[
  {
    "path": ".gitignore",
    "content": "*.pyc\n"
  },
  {
    "path": "LICENSE",
    "content": "MIT License\n\nCopyright (c) 2016 YerevaNN\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "README.md",
    "content": "# Spoken language identification with deep learning\n\nRead more in the following blog posts:\n\n* [About TopCoder contest and our CNN-based solution implemented in Caffe](http://yerevann.github.io/2015/10/11/spoken-language-identification-with-deep-convolutional-networks/) (October 2015)\n* [About combining CNN and RNN using Theano/Lasagne](http://yerevann.github.io/2016/06/26/combining-cnn-and-rnn-for-spoken-language-identification/) (June 2016)\n\nTheano/Lasagne models are [here](/theano). The basic steps to run them are:\n\n* Download the dataset from [here](https://community.topcoder.com/longcontest/?module=ViewProblemStatement&rd=16555&pm=13978) or use your own dataset.\n* Create spectrograms for recording using `create_spectrograms.py` or `augment_data.py`. The latter will also augment the data by randomly perturbing the spectrograms and cropping a random interval of length 9s from the recording. \n* Create listfiles for training set and validation set, where each row of the a listfile describes one example and has 2 values seperated by a comma. The first one is the name of the example, the second one is the label (counting starts from 0). A typical listfile will look like [this](https://gist.github.com/Harhro94/aa11fe6b454c614cdedea882fd00f8d7).\n* Change the `png_folder` and listfile paths in [`theano/main.py`](/theano/main.py).\n* Run `theano/main.py`."
  },
  {
    "path": "augment_data.py",
    "content": "import numpy as np\nfrom matplotlib import pyplot as plt\nimport scipy.io.wavfile as wav\nfrom numpy.lib import stride_tricks\nimport PIL.Image as Image\nimport os\n\n\"\"\" short time fourier transform of audio signal \"\"\"\ndef stft(sig, frameSize, overlapFac=0.5, window=np.hanning):\n    win = window(frameSize)\n    hopSize = int(frameSize - np.floor(overlapFac * frameSize))\n    \n    # zeros at beginning (thus center of 1st window should be for sample nr. 0)\n    samples = np.append(np.zeros(np.floor(frameSize/2.0)), sig)    \n    # cols for windowing\n    cols = np.ceil( (len(samples) - frameSize) / float(hopSize)) + 1\n    # zeros at end (thus samples can be fully covered by frames)\n    samples = np.append(samples, np.zeros(frameSize))\n    \n    frames = stride_tricks.as_strided(samples, shape=(cols, frameSize), strides=(samples.strides[0]*hopSize, samples.strides[0])).copy()\n    frames *= win\n    \n    return np.fft.rfft(frames)    \n    \n\"\"\" scale frequency axis logarithmically \"\"\"    \ndef logscale_spec(spec, sr=44100, factor=20., alpha=1.0, f0=0.9, fmax=1):\n    spec = spec[:, 0:256]\n    timebins, freqbins = np.shape(spec)\n    scale = np.linspace(0, 1, freqbins) #** factor\n    \n    # http://ieeexplore.ieee.org/xpl/login.jsp?tp=&arnumber=650310&url=http%3A%2F%2Fieeexplore.ieee.org%2Fiel4%2F89%2F14168%2F00650310\n    scale = np.array(map(lambda x: x * alpha if x <= f0 else (fmax-alpha*f0)/(fmax-f0)*(x-f0)+alpha*f0, scale))\n    scale *= (freqbins-1)/max(scale)\n\n    newspec = np.complex128(np.zeros([timebins, freqbins]))\n    allfreqs = np.abs(np.fft.fftfreq(freqbins*2, 1./sr)[:freqbins+1])\n    freqs = [0.0 for i in range(freqbins)]\n    totw = [0.0 for i in range(freqbins)]\n    for i in range(0, freqbins):\n        if (i < 1 or i + 1 >= freqbins):\n            newspec[:, i] += spec[:, i]\n            freqs[i] += allfreqs[i]\n            totw[i] += 1.0\n            continue\n        else:\n            # scale[15] = 17.2\n            w_up = scale[i] - np.floor(scale[i])\n            w_down = 1 - w_up\n            j = int(np.floor(scale[i]))\n           \n            newspec[:, j] += w_down * spec[:, i]\n            freqs[j] += w_down * allfreqs[i]\n            totw[j] += w_down\n            \n            newspec[:, j + 1] += w_up * spec[:, i]\n            freqs[j + 1] += w_up * allfreqs[i]\n            totw[j + 1] += w_up\n    \n    for i in range(len(freqs)):\n        if (totw[i] > 1e-6):\n            freqs[i] /= totw[i]\n    \n    return newspec, freqs\n\n\"\"\" plot spectrogram\"\"\"\ndef plotstft(audiopath, binsize=2**10, plotpath=None, colormap=\"gray\", channel=0, name='tmp.png', alpha=1, offset=0):\n    samplerate, samples = wav.read(audiopath)\n    samples = samples[:, channel]\n    s = stft(samples, binsize)\n\n    sshow, freq = logscale_spec(s, factor=1, sr=samplerate, alpha=alpha)\n    sshow = sshow[2:, :]\n    ims = 20.*np.log10(np.abs(sshow)/10e-6) # amplitude to decibel\n    timebins, freqbins = np.shape(ims)\n    \n    ims = np.transpose(ims)\n    ims = ims[0:256, offset:offset+768] # 0-11khz, ~9s interval\n    #print \"ims.shape\", ims.shape\n    \n    image = Image.fromarray(ims) \n    image = image.convert('L')\n    image.save(name)\n\n\nfile = open('trainingData.csv', 'r')\nfor iter, line in enumerate(file.readlines()[1:]): # first line of traininData.csv is header (only for trainingData.csv)\n    filepath = line.split(',')[0]\n    filename = filepath[:-4]\n    wavfile = 'tmp.wav'\n    os.system('mpg123 -w ' + wavfile + ' /home/brainstorm/caffe/Data/mnt/3/language/train/mp3/' + filepath)\n    for augmentIdx in range(0, 20):\n        alpha = np.random.uniform(0.9, 1.1)\n        offset = np.random.randint(90)\n        plotstft(wavfile, channel=0, name='/home/brainstorm/data/language/train/pngaugm/'+filename+'.'+str(augmentIdx)+'.png',\n                 alpha=alpha, offset=offset)\n\n    os.remove(wavfile)\n    print \"processed %d files\" % (iter + 1)\n    \n"
  },
  {
    "path": "choose_equal_split.py",
    "content": "\"\"\"split data into training and validation sets\"\"\"\nimport csv\n\nwith open('trainingData.csv', 'rb') as csvfile:\n    next(csvfile) #skip headers\n    data = list(csv.reader(csvfile, delimiter=','))\n\n    #Map every language to an ID\n    langs = set([language.strip() for _,language in data])\n    ID = {lang: i for i,lang in enumerate(sorted(langs))}\n\n    #Write first 306 items to training set and the rest to validation set\n    cnt = [0 for _ in range(len(langs))]\n    with open('trainEqual.csv', 'w') as train:\n        with open('valEqaul.csv', 'w') as val:\n            for line in data:\n                filepath, language = map(str.strip, line)\n                id_lang = ID[language]\n\n                if (cnt[id_lang] < 306):\n                    train.write(filepath[:-4] + ',' + str(id_lang) + '\\n')\n                else:\n                    val.write(filepath[:-4] + ',' + str(id_lang) + '\\n')\n                cnt[id_lang] += 1\n"
  },
  {
    "path": "concatenate_csvs.py",
    "content": "\"\"\" Usage: python concatenate_csvs.py csv1path csv2path ..\n\"\"\"\nimport sys\nimport numpy as np\n\nn_csv = len(sys.argv) - 1\ncnt = 12320\n\ncsv = []\nfor index in range(1, len(sys.argv)):\n    csv.append(open(sys.argv[index], 'r'))\n    \noutfile = open('concatenated.csv', 'w')\n\nfor iter in range(12320):\n    out = []\n    for index in range(n_csv):\n        cur_out = csv[index].readline().split(',')\n        cur_out = [float(x) for x in cur_out]\n        out += cur_out\n\n    out = [(\"%.6f\" % x) for x in out]\n    outfile.write(','.join(out) + '\\n')"
  },
  {
    "path": "create_spectrograms.py",
    "content": "import numpy as np\nfrom matplotlib import pyplot as plt\nimport scipy.io.wavfile as wav\nfrom numpy.lib import stride_tricks\nimport PIL.Image as Image\nimport os\n\n\"\"\" short time fourier transform of audio signal \"\"\"\ndef stft(sig, frameSize, overlapFac=0.5, window=np.hanning):\n    win = window(frameSize)\n    hopSize = int(frameSize - np.floor(overlapFac * frameSize))\n    \n    # zeros at beginning (thus center of 1st window should be for sample nr. 0)\n    samples = np.append(np.zeros(np.floor(frameSize/2.0)), sig)    \n    # cols for windowing\n    cols = np.ceil( (len(samples) - frameSize) / float(hopSize)) + 1\n    # zeros at end (thus samples can be fully covered by frames)\n    samples = np.append(samples, np.zeros(frameSize))\n    \n    frames = stride_tricks.as_strided(samples, shape=(cols, frameSize), strides=(samples.strides[0]*hopSize, samples.strides[0])).copy()\n    frames *= win\n    \n    return np.fft.rfft(frames)    \n    \n\"\"\" scale frequency axis logarithmically \"\"\"    \ndef logscale_spec(spec, sr=44100, factor=20., alpha=1.0, f0=0.9, fmax=1):\n    spec = spec[:, 0:256]\n    timebins, freqbins = np.shape(spec)\n    scale = np.linspace(0, 1, freqbins) #** factor\n    \n    # http://ieeexplore.ieee.org/xpl/login.jsp?tp=&arnumber=650310&url=http%3A%2F%2Fieeexplore.ieee.org%2Fiel4%2F89%2F14168%2F00650310\n    scale = np.array(map(lambda x: x * alpha if x <= f0 else (fmax-alpha*f0)/(fmax-f0)*(x-f0)+alpha*f0, scale))\n    scale *= (freqbins-1)/max(scale)\n\n    newspec = np.complex128(np.zeros([timebins, freqbins]))\n    allfreqs = np.abs(np.fft.fftfreq(freqbins*2, 1./sr)[:freqbins+1])\n    freqs = [0.0 for i in range(freqbins)]\n    totw = [0.0 for i in range(freqbins)]\n    for i in range(0, freqbins):\n        if (i < 1 or i + 1 >= freqbins):\n            newspec[:, i] += spec[:, i]\n            freqs[i] += allfreqs[i]\n            totw[i] += 1.0\n            continue\n        else:\n            # scale[15] = 17.2\n            w_up = scale[i] - np.floor(scale[i])\n            w_down = 1 - w_up\n            j = int(np.floor(scale[i]))\n           \n            newspec[:, j] += w_down * spec[:, i]\n            freqs[j] += w_down * allfreqs[i]\n            totw[j] += w_down\n            \n            newspec[:, j + 1] += w_up * spec[:, i]\n            freqs[j + 1] += w_up * allfreqs[i]\n            totw[j + 1] += w_up\n    \n    for i in range(len(freqs)):\n        if (totw[i] > 1e-6):\n            freqs[i] /= totw[i]\n    \n    return newspec, freqs\n\n\"\"\" plot spectrogram\"\"\"\ndef plotstft(audiopath, binsize=2**10, plotpath=None, colormap=\"gray\", channel=0, name='tmp.png', alpha=1, offset=0):\n    samplerate, samples = wav.read(audiopath)\n    samples = samples[:, channel]\n    s = stft(samples, binsize)\n\n    sshow, freq = logscale_spec(s, factor=1, sr=samplerate, alpha=alpha)\n    sshow = sshow[2:, :]\n    ims = 20.*np.log10(np.abs(sshow)/10e-6) # amplitude to decibel\n    timebins, freqbins = np.shape(ims)\n    \n    ims = np.transpose(ims)\n    # ims = ims[0:256, offset:offset+768] # 0-11khz, ~9s interval\n    ims = ims[0:256, :] # 0-11khz, ~10s interval\n    #print \"ims.shape\", ims.shape\n    \n    image = Image.fromarray(ims) \n    image = image.convert('L')\n    image.save(name)\n\n\nfile = open('trainingData.csv', 'r')\nfor iter, line in enumerate(file.readlines()[1:]): # first line of traininData.csv is header (only for trainingData.csv)\n    filepath = line.split(',')[0]\n    filename = filepath[:-4]\n    wavfile = 'tmp.wav'\n    os.system('mpg123 -w ' + wavfile + ' /home/brainstorm/caffe/Data/mnt/3/language/train/mp3/' + filepath)\n    \"\"\"\n    for augmentIdx in range(0, 20):\n        alpha = np.random.uniform(0.9, 1.1)\n        offset = np.random.randint(90)\n        plotstft(wavfile, channel=0, name='/home/brainstorm/data/language/train/pngaugm/'+filename+'.'+str(augmentIdx)+'.png',\n                 alpha=alpha, offset=offset)\n    \"\"\"\n    # we create only one spectrogram for each speach sample\n    # we don't do vocal tract length perturbation (alpha=1.0)\n    # also we don't crop 9s part from the speech\n    plotstft(wavfile, channel=0, name='/home/brainstorm/data/language/train/pngaugm/'+filename+'.png', alpha=1.0)\n    os.remove(wavfile)\n    print \"processed %d files\" % (iter + 1)"
  },
  {
    "path": "ensembling/ensemble.theano.py",
    "content": "\"\"\" Usage: python ensemble.theano.py model1 [another_model]*\n    \nfor GPU mode\n    1. export PATH=$PATH:/usr/local/cuda-6.5/bin\n    2. THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32,nvcc.flags='-arch=sm_30' python ensemble.theano.py model1 [another_model]*\n\"\"\"\n\nimport cPickle as pickle\nimport sys\nimport caffe\nimport numpy as np\n\ncaffe.set_mode_gpu()\n\ndef get_score(probs, label):\n    pred = sorted([(x, it) for it, x in enumerate(probs)], reverse=True)\n    if (pred[0][1] == label):\n        return 1000\n    if (pred[1][1] == label):\n        return 400\n    if (pred[2][1] == label): \n        return 160\n    return 0\n    \ndef get_full_score(preds, labels):\n    topCoderScore = 0.0\n    for i in range(len(labels)):\n        topCoderScore += get_score(preds[i], labels[i])  \n    \n    return topCoderScore / len(labels) * 3520\n\n####################### COLLECTING INFO ABOUT LANGS ############################\nfile = open('../trainingData.csv')\ndata = file.readlines()[1:]\nlangs = set()\nfor line in data:\n    filepath, language = line.split(',')\n    language = language.strip()\n    langs.add(language)\nlangs = sorted(langs)\nfile.close()\n\nn_models = len(sys.argv) - 1\nX = np.zeros((12320, n_models * 176), dtype=np.float32)\nfor iter in range(n_models):\n    csvpath = 'probs/val/' + sys.argv[iter + 1]\n    csv = open(csvpath, 'r')\n    for row_id, line in enumerate(csv.readlines()):\n        mas = line.split(',')\n        mas = np.array([float(x) for x in mas], dtype=np.float32)\n        X[row_id, 176*iter:176*(iter+1)] = mas\n    csv.close()\n    \nY = []\nlabel_file = open('../valEqual.csv')\nfor line in label_file.readlines():\n    Y.append(int(line.split(',')[1]))\nlabel_file.close()\n\nprint \"X.shape =\", X.shape\nprint \"len(Y) =\", len(Y)\n\nfor iter in range(n_models):\n    print \"score of model %d = %f\" % (iter+1, get_full_score(X[:, 176*iter:176*(iter+1)], Y))\n\n\n######################### TRAINING ENSEMBLING MODEL ############################\nimport theano\nimport theano.tensor as T\nimport lasagne\nimport lasagne.layers as layers\n\nn_train_examples = 10000\nX = X.astype(theano.config.floatX)\ntrainX = X[:n_train_examples]\ntrainY = Y[:n_train_examples]\nvalX = X[n_train_examples:]\nvalY = Y[n_train_examples:]\n\ninput_var = T.matrix('X')\ntarget_var = T.ivector('y')\n\nfrom lasagne.nonlinearities import softmax, sigmoid, rectify\nnetwork = lasagne.layers.InputLayer((None, X.shape[1]), input_var)\nnetwork = lasagne.layers.DenseLayer(network, 4000, nonlinearity=rectify)\nnetwork = lasagne.layers.DenseLayer(lasagne.layers.dropout(network, 0.5), 176, nonlinearity=softmax)\n\nprediction = lasagne.layers.get_output(network)\nloss = lasagne.objectives.categorical_crossentropy(prediction, target_var)\nloss = loss.mean() + 0 * lasagne.regularization.regularize_network_params(\n        network, lasagne.regularization.l2)\n\nparams = lasagne.layers.get_all_params(network, trainable=True)\nlearning_rate = theano.shared(np.float32(0.2))\nupdates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=learning_rate, momentum=0.9)\ntrain_fn = theano.function([input_var, target_var], loss, updates=updates)\nvalidation_fn = theano.function([input_var, target_var], loss)\n\nfor epoch in range(1000):\n    train_loss = train_fn(trainX, trainY)\n    val_loss = validation_fn(valX, valY)\n    print \"Epoch %d: train_loss = %f, val_loss = %f, lr = %f\" % (epoch + 1, train_loss, val_loss, learning_rate.get_value())\n    if (epoch > 0 and epoch % 200 == 0):\n        learning_rate.set_value(np.float32(learning_rate.get_value() * 0.7))\n    \ntest_prediction = lasagne.layers.get_output(network, deterministic=True)\npredict_fn = theano.function([input_var], test_prediction)\nall_predictions = predict_fn(valX)\n\nscore = 0.0\nfor probs, label in zip(all_predictions, valY):\n    score += get_score(probs, label)\nprint \"Final score on ensembling validaion = %f\" % score\nprint \"Expected score = %f\" % (score / len(valY) * 3520)\n\n\nprint \"\\n\\n==> creating submission...\"\nX = np.zeros((12320, n_models * 176), dtype=np.float32)\nfor iter in range(n_models):\n    csvpath = 'probs/test/' + sys.argv[iter + 1]\n    csv = open(csvpath, 'r')\n    for row_id, line in enumerate(csv.readlines()):\n        mas = line.split(',')\n        mas = np.array([float(x) for x in mas], dtype=np.float32)\n        X[row_id, 176*iter:176*(iter+1)] = mas\n    csv.close()\n\nprediction = predict_fn(X)\nprint \"prediction.shape =\", prediction.shape\nensembled = open('ensembled.csv', 'w')\nfor probs in prediction:\n    out = [str(x) for x in probs]\n    ensembled.write(','.join(out) + '\\n')\n\n\n\"\"\"\n######################### SAVING MODEL TO BE ABLE TO REPRODUCE #################\nprint \"==> Saving model...\"\nwith open(\"model.pickle\", 'w') as save_file:\n\tpickle.dump(obj = {'params' : layers.get_all_param_values(network)}, file = save_file, protocol = -1)\n\"\"\"\n"
  },
  {
    "path": "ensembling/get_output_layers.py",
    "content": "\"\"\" Usage: python get_output_layers.py test|val\n\"\"\"\nimport sys\nimport caffe\nimport numpy as np\n\ncaffe.set_mode_gpu()\n\ndeploy = '../prototxt/deploy.augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3.prototxt'\nmodel = 'augm_dropout0.3_on_augm84K-lr0.01_30K_iter_75000'\nmodel_path = '../models/' + model + '.caffemodel'\n\n\"\"\"\n####################### networks with no augmentation ##########################\nnet = caffe.Classifier(deploy, model_path)\ntransformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})\ntransformer.set_transpose('data', (2, 0, 1))\nnet.blobs['data'].reshape(1, 1, 256, 858)\n\nfolder = '/home/brainstorm/caffe/Data/mnt/3/language/train/png/'\ncnt = 12320\nfile = open('../valEqual.csv', 'r')\nprob_file = open('probs/val/' + model + '.csv', 'w')\n\nfor iter in range(cnt):\n    name = file.readline().split(',')[0]\n    net.blobs['data'].data[...] = transformer.preprocess('data', \n            caffe.io.load_image(folder + name + '.png', color=False))\n    probs = net.forward()['loss'][0]\n    probs = [str(x) for x in probs]\n    prob_file.write(','.join(probs) + '\\n')\n    \n    if (iter % 100 == 0):\n        print \"processed %d images\" % (iter + 1)\n\"\"\"\n\n######################### networks with augmentation ###########################\nassert sys.argv[1] in ('test', 'val')\ndataset = sys.argv[1]\naugm_cnt = 20\ncnt = 12320\n\nif (dataset == 'val'):\n    folder = '/home/brainstorm/caffe/Data/mnt/3/language/train/pngaugm/'\n    file = open('../valEqual.csv', 'r')\nelse:\n    folder = '../test/pngaugm/'\n    file = open('../testingData.csv', 'r')\n\n# sum - mean of augm_cnt versions of speech\n# log - mean of logs of augm_cnt versions of speech\n# dense - last dense layer, 1024 outputs\nprob_file_sum = open('probs/' + dataset + '/' + model + '.sum' + str(augm_cnt) + '.csv', 'w')\nprob_file_log = open('probs/' + dataset + '/' + model + '.log' + str(augm_cnt) + '.csv', 'w')\ndense_file = open('probs/' + dataset + '/'+ model + '.dense' + str(augm_cnt) + '.csv', 'w')\n\nnet = caffe.Classifier(deploy, model_path)\ntransformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})\ntransformer.set_transpose('data', (2, 0, 1))\n\nnet.blobs['data'].reshape(augm_cnt, 1, 256, 768)\nfor iter in range(cnt):\n    if (dataset == 'val'):\n        name = file.readline().split(',')[0]\n    else:\n        name = file.readline().strip()[:-4]\n    X = np.zeros((augm_cnt, 1, 256, 768), dtype=np.float32)\n    for index in range(augm_cnt):\n        augm_path = folder + name + '.' + str(index) + '.png'\n        X[index] = transformer.preprocess('data', caffe.io.load_image(augm_path, color=False))\n\n    net.blobs['data'].data[...] = X\n    out = net.forward()['loss']\n    probs_sum = out.mean(axis=0)\n    probs_log = np.log(out + 1e-7).mean(axis=0)\n    dense = net.blobs['ip2new'].data\n    \n    probs_sum = [str(x) for x in probs_sum]\n    prob_file_sum.write(','.join(probs_sum) + '\\n')\n    \n    probs_log = [\"%f\" % x for x in probs_log]\n    prob_file_log.write(','.join(probs_log) + '\\n')\n    \n    for index in range(augm_cnt):\n        tmp = [str(x) for x in dense[index]]\n        dense_file.write(','.join(tmp) + '\\n')\n    \n    if (iter % 10 == 0):\n        print \"processed %d images\" % (iter + 1)\n"
  },
  {
    "path": "get_score_from_probabilities.py",
    "content": "\"\"\" USAGE: python get_score_from_probabilities.py --prediction= --anwser=\n    prediction file may have less lines\n\"\"\"\nimport sys\nimport numpy as np\nimport argparse\n\nparser = argparse.ArgumentParser()\nparser.add_argument('--prediction', type=str)\nparser.add_argument('--answer', type=str, default='valDataNew.csv')\nargs = parser.parse_args()\nprint args\n\n\n# info about classes\nfile = open('trainingData.csv')\ndata = file.readlines()[1:]\nlangs = set()\nfor line in data:\n    filepath, language = line.split(',')\n    language = language.strip()\n    langs.add(language)\nlangs = sorted(langs)\n\n\nprediction_file = open(args.prediction, 'r')\nprediction_lines = prediction_file.readlines()\nanswer_file = open(args.answer, 'r')\nanswer_lines = answer_file.readlines()\ncnt = len(prediction_lines)\ntop_coder_score = 0.0\ncorrect = 0\n\nwrong_answers = open('wrong_answers.txt', 'w')\n\nfor iter in range(cnt):\n    st = answer_lines[iter]\n    (name, label) = st.split(',')\n    label = int(label)\n\n    out = prediction_lines[iter].split(',')\n    out = [float(x) for x in out]\n    pred = [(x, it) for it, x in enumerate(out)]\n    pred = sorted(pred, reverse=True)\n\n    if (pred[0][1] == label):\n        correct += 1\n        top_coder_score = top_coder_score + 1000\n    elif (pred[1][1] == label):\n        #correct += 1\n        top_coder_score = top_coder_score + 400\n    elif (pred[2][1] == label): \n        #correct += 1\n        top_coder_score = top_coder_score + 160\n\n    if (pred[0][1] != label):\n        print >> wrong_answers, answer_lines[iter] + prediction_lines[iter]\n    \n    if ((iter + 1) % 100 == 0):\n        print >> sys.stderr, \"processed %d / %d images\" % (iter + 1, cnt)\n        print >> sys.stderr, \"expected score:\", top_coder_score / (iter + 1) * 35200\n\nprint >> sys.stderr, \"Final score: \", top_coder_score, \" / \", cnt, \"000\"\nprint >> sys.stderr, \"expected score:\", top_coder_score / cnt * 35200\nprint >> sys.stderr, \"Accuracy: \", 100.0 * correct / cnt"
  },
  {
    "path": "get_score_from_top3_prediction.py",
    "content": "\"\"\" USAGE: python get_score_fromcsv.py --prediction= --anwser=\n   \n    Prediction file may have less lines\n    \n    Each line of prediction file must contain at least 3 integers: labels of top3\n    predictions, then it may have some additional information\n\"\"\"\nimport sys\nimport numpy as np\nimport argparse\n\nparser = argparse.ArgumentParser()\nparser.add_argument('--prediction', type=str)\nparser.add_argument('--answer', type=str, default='valDataNew.csv')\nargs = parser.parse_args()\nprint args\n\n\n# info about classes\nfile = open('trainingData.csv')\ndata = file.readlines()[1:]\nlangs = set()\nfor line in data:\n    filepath, language = line.split(',')\n    language = language.strip()\n    langs.add(language)\nlangs = sorted(langs)\n\n\nprediction_file = open(args.prediction, 'r')\nprediction_lines = prediction_file.readlines()\nanswer_file = open(args.answer, 'r')\nanswer_lines = answer_file.readlines()\ncnt = len(prediction_lines)\ntop_coder_score = 0.0\ncorrect = 0\n\nwrong_answers = open('wrong_answers.txt', 'w')\n\nfor iter in range(cnt):\n    st = answer_lines[iter]\n    (name, label) = st.split(',')\n    label = int(label)\n\n    pred = prediction_lines[iter].split(',')\n    pred = [int(x) for x in pred]\n\n    if (pred[0] == label):\n        correct += 1\n        top_coder_score = top_coder_score + 1000\n    elif (pred[1] == label):\n        #correct += 1\n        top_coder_score = top_coder_score + 400\n    elif (pred[2] == label):\n        #correct += 1\n        top_coder_score = top_coder_score + 160\n\n    if (pred[0] != label):\n        print >> wrong_answers, (answer_lines[iter] + str(pred[3 + pred[0]]) + ',' + str(pred[3 + pred[1]]) + ',' + \n            str(pred[3 + pred[2]]) + ', votes for correct answer: ' + str(pred[3 + label])) \n\n    if ((iter + 1) % 100 == 0):\n        print >> sys.stderr, \"processed %d / %d images\" % (iter + 1, cnt)\n        print >> sys.stderr, \"expected score:\", top_coder_score / (iter + 1) * 35200\n\nprint >> sys.stderr, \"Final score: \", top_coder_score, \" / \", cnt, \"000\"\nprint >> sys.stderr, \"expected score:\", top_coder_score / cnt * 35200\nprint >> sys.stderr, \"Accuracy: \", 100.0 * correct / cnt"
  },
  {
    "path": "get_sum_of_csvs.py",
    "content": "\"\"\" Usage: python get_sum_csvs.py csv1path csv2path ..\n\"\"\"\nimport sys\nimport numpy as np\n\nn_csv = len(sys.argv) - 1\ncnt = 12320\n\ncsv = []\nfor index in range(1, len(sys.argv)):\n    csv.append(open(sys.argv[index], 'r'))\n    \noutfile = open('summed.csv', 'w')\n\nfor iter in range(12320):\n    out = np.zeros((176,), dtype=np.float32)\n    for index in range(n_csv):\n        cur_out = csv[index].readline().split(',')\n        cur_out = [float(x) for x in cur_out]\n        out += cur_out\n    \n    out = [(\"%.6f\" % x) for x in out]\n    outfile.write(','.join(out) + '\\n')"
  },
  {
    "path": "majority_vote_ensembling.py",
    "content": "\"\"\" Usage: python majority_vote_ensembling.py csv1path csv2path ..\n\"\"\"\nimport sys\nimport numpy as np\n\nn_csv = len(sys.argv) - 1\ntrain_cnt = 12320\n\ncsv = []\nfor index in range(1, len(sys.argv)):\n    csv.append(open(sys.argv[index], 'r'))\n    \nensembled = open('top3_prediction_ensembled.csv', 'w')\n\nfor iter in range(train_cnt):\n    cnt = [0 for i in range(176)]\n    avg_prob = np.array([0.0 for i in range(176)])\n\n    for index in range(n_csv):\n        cur_prob = csv[index].readline().split(',')\n        cur_prob = np.array([float(x) for x in cur_prob])\n        \n        avg_prob += cur_prob\n        prediction = cur_prob.argmax()\n        cnt[prediction] += 1\n\n\n    mas = [(cnt[index], avg_prob[index], index) for index in range(176)]\n    mas = sorted(mas, reverse=True)\n    \n    ensembled.write(str(mas[0][2]) + ',' + str(mas[1][2]) + ',' + str(mas[2][2]) + ',')\n    ensembled.write(','.join([str(x) for x in cnt]) + '\\n')\n    "
  },
  {
    "path": "make_submission.py",
    "content": "\"\"\" Usage: python make_submission.py csvpath model_name\ncsv - must contain 12320 rows, 176 coloumns: the predictions for test set\n\"\"\"\n\nimport sys\nimport numpy as np\n\n# info about classes\nfile = open('trainingData.csv')\ndata = file.readlines()[1:]\nlangs = set()\nfor line in data:\n    filepath, language = line.split(',')\n    language = language.strip()\n    langs.add(language)\nlangs = sorted(langs)\n\npath = sys.argv[1]\nname = sys.argv[2]\nread_file = open(path, 'r')\nf = open('testingData.csv')\ncnt = 12320\nprint_file = open('predictions/test_' + name + '.csv', 'w')\n\nfor iter in range(cnt):\n    st = f.readline()\n    name = st.strip()[:-4]\n    \n    out = read_file.readline().split(',')\n    out = [float(x) for x in out]\n    pred = sorted([(x, it) for it, x in enumerate(out)], reverse=True)\n\n    for i in range(3):\n        lang_id = pred[i][1]\n        lang = langs[lang_id]\n        print_file.write(name + '.mp3,' + lang + ',' + str(i + 1) + '\\n')\n\n    if (iter % 100 == 0):\n        print >> sys.stderr, \"processed %d / %d images\" % (iter + 1, cnt)\n"
  },
  {
    "path": "prototxt/augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024r-1024r_DLR_nolrcoef.prototxt",
    "content": "name: \"LangNet\"\n# DATA LAYERS\nlayer {\n  name: \"mnist\"\n  type: \"Data\"\n  top: \"data\"\n  top: \"label\"\n  include {\n    phase: TRAIN\n  }\n  transform_param {\n    scale: 0.00390625\n  }\n  data_param {\n    source: \"train/train_augm_db\"\n    batch_size: 24\n    backend: LEVELDB\n  }\n}\nlayer {\n  name: \"mnist\"\n  type: \"Data\"\n  top: \"data\"\n  top: \"label\"\n  include {\n    phase: TEST\n  }\n  transform_param {\n    scale: 0.00390625\n  }\n  data_param {\n    source: \"train/val_augm_db\"\n    batch_size: 24\n    backend: LEVELDB\n  }\n}\n\n# CONV1-RELU1-POOL1\nlayer {\n  name: \"conv1\"\n  type: \"Convolution\"\n  bottom: \"data\"\n  top: \"conv1\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  convolution_param {\n    num_output: 32\n    kernel_size: 7\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu1\"\n  type: \"ReLU\"\n  bottom: \"conv1\"\n  top: \"conv1\"\n}\nlayer {\n  name: \"pool1\"\n  type: \"Pooling\"\n  bottom: \"conv1\"\n  top: \"pool1\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride: 2\n  }\n}\n\n# CONV2-RELU2-POOL2_\nlayer {\n  name: \"conv2\"\n  type: \"Convolution\"\n  bottom: \"pool1\"\n  top: \"conv2\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  convolution_param {\n    num_output: 64\n    kernel_size: 5\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu2\"\n  type: \"ReLU\"\n  bottom: \"conv2\"\n  top: \"conv2\"\n}\nlayer {\n  name: \"pool2\"\n  type: \"Pooling\"\n  bottom: \"conv2\"\n  top: \"pool2\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride: 2\n  }\n}\n\n# CONV3-RELU3-POOL3\nlayer {\n  name: \"conv3\"\n  type: \"Convolution\"\n  bottom: \"pool2\"\n  top: \"conv3\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  convolution_param {\n    num_output: 64\n    kernel_size: 3\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu3\"\n  type: \"ReLU\"\n  bottom: \"conv3\"\n  top: \"conv3\"\n}\nlayer {\n  name: \"pool3\"\n  type: \"Pooling\"\n  bottom: \"conv3\"\n  top: \"pool3\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride:2\n  }\n}\n\n# CONV4-RELU4-POOL4\nlayer {\n  name: \"conv4\"\n  type: \"Convolution\"\n  bottom: \"pool3\"\n  top: \"conv4\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  convolution_param {\n    num_output: 128\n    kernel_size: 3\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu4\"\n  type: \"ReLU\"\n  bottom: \"conv4\"\n  top: \"conv4\"\n}\nlayer {\n  name: \"pool4\"\n  type: \"Pooling\"\n  bottom: \"conv4\"\n  top: \"pool4\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride:2\n  }\n}\n\n# CONV5-RELU5-POOL5\nlayer {\n  name: \"conv5\"\n  type: \"Convolution\"\n  bottom: \"pool4\"\n  top: \"conv5\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  convolution_param {\n    num_output: 128\n    kernel_size: 3\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu5\"\n  type: \"ReLU\"\n  bottom: \"conv5\"\n  top: \"conv5\"\n}\nlayer {\n  name: \"pool5\"\n  type: \"Pooling\"\n  bottom: \"conv5\"\n  top: \"pool5\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride:2\n  }\n}\n\n# CONV6-RELU6-POOL6\nlayer {\n  name: \"conv6\"\n  type: \"Convolution\"\n  bottom: \"pool5\"\n  top: \"conv6\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  convolution_param {\n    num_output: 256\n    kernel_size: 3\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu6\"\n  type: \"ReLU\"\n  bottom: \"conv6\"\n  top: \"conv6\"\n}\nlayer {\n  name: \"pool6\"\n  type: \"Pooling\"\n  bottom: \"conv6\"\n  top: \"pool6\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride:2\n  }\n}\n\n# IP layers\nlayer {\n  name: \"ip1new\"\n  type: \"InnerProduct\"\n  bottom: \"pool6\"\n  top: \"ip1new\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  inner_product_param {\n    num_output: 1024\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"reluOnIp1\"\n  type: \"ReLU\"\n  bottom: \"ip1new\"\n  top: \"ip1new\"\n}\nlayer {\n  name: \"ip2new\"\n  type: \"InnerProduct\"\n  bottom: \"ip1new\"\n  top: \"ip2new\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  inner_product_param {\n    num_output: 1024\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"reluOnIp2\"\n  type: \"ReLU\"\n  bottom: \"ip2new\"\n  top: \"ip2new\"\n}\nlayer {\n  name: \"ip3new\"\n  type: \"InnerProduct\"\n  bottom: \"ip2new\"\n  top: \"ip3new\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  inner_product_param {\n    num_output: 176\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"accuracy\"\n  type: \"Accuracy\"\n  bottom: \"ip3new\"\n  bottom: \"label\"\n  top: \"accuracy\"\n  include {\n    phase: TEST\n  }\n}\nlayer {\n  name: \"loss\"\n  type: \"SoftmaxWithLoss\"\n  bottom: \"ip3new\"\n  bottom: \"label\"\n  top: \"loss\"\n}\n"
  },
  {
    "path": "prototxt/augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3.prototxt",
    "content": "name: \"LangNet\"\n# DATA LAYERS\nlayer {\n  name: \"mnist\"\n  type: \"Data\"\n  top: \"data\"\n  top: \"label\"\n  include {\n    phase: TRAIN\n  }\n  transform_param {\n    scale: 0.00390625\n  }\n  data_param {\n    source: \"train/train_augm_db\"\n    batch_size: 23\n    backend: LEVELDB\n  }\n}\nlayer {\n  name: \"mnist\"\n  type: \"Data\"\n  top: \"data\"\n  top: \"label\"\n  include {\n    phase: TEST\n  }\n  transform_param {\n    scale: 0.00390625\n  }\n  data_param {\n    source: \"train/val_augm_db\"\n    batch_size: 24\n    backend: LEVELDB\n  }\n}\n\n# CONV1-RELU1-POOL1\nlayer {\n  name: \"conv1\"\n  type: \"Convolution\"\n  bottom: \"data\"\n  top: \"conv1\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  convolution_param {\n    num_output: 32\n    kernel_size: 7\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu1\"\n  type: \"ReLU\"\n  bottom: \"conv1\"\n  top: \"conv1\"\n}\nlayer {\n  name: \"pool1\"\n  type: \"Pooling\"\n  bottom: \"conv1\"\n  top: \"pool1\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride: 2\n  }\n}\n\n# CONV2-RELU2-POOL2_\nlayer {\n  name: \"conv2\"\n  type: \"Convolution\"\n  bottom: \"pool1\"\n  top: \"conv2\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  convolution_param {\n    num_output: 64\n    kernel_size: 5\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu2\"\n  type: \"ReLU\"\n  bottom: \"conv2\"\n  top: \"conv2\"\n}\nlayer {\n  name: \"pool2\"\n  type: \"Pooling\"\n  bottom: \"conv2\"\n  top: \"pool2\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride: 2\n  }\n}\n\n# CONV3-RELU3-POOL3\nlayer {\n  name: \"conv3\"\n  type: \"Convolution\"\n  bottom: \"pool2\"\n  top: \"conv3\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  convolution_param {\n    num_output: 64\n    kernel_size: 3\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu3\"\n  type: \"ReLU\"\n  bottom: \"conv3\"\n  top: \"conv3\"\n}\nlayer {\n  name: \"pool3\"\n  type: \"Pooling\"\n  bottom: \"conv3\"\n  top: \"pool3\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride:2\n  }\n}\n\n# CONV4-RELU4-POOL4\nlayer {\n  name: \"conv4\"\n  type: \"Convolution\"\n  bottom: \"pool3\"\n  top: \"conv4\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  convolution_param {\n    num_output: 128\n    kernel_size: 3\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu4\"\n  type: \"ReLU\"\n  bottom: \"conv4\"\n  top: \"conv4\"\n}\nlayer {\n  name: \"pool4\"\n  type: \"Pooling\"\n  bottom: \"conv4\"\n  top: \"pool4\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride:2\n  }\n}\n\n# CONV5-RELU5-POOL5\nlayer {\n  name: \"conv5\"\n  type: \"Convolution\"\n  bottom: \"pool4\"\n  top: \"conv5\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  convolution_param {\n    num_output: 128\n    kernel_size: 3\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu5\"\n  type: \"ReLU\"\n  bottom: \"conv5\"\n  top: \"conv5\"\n}\nlayer {\n  name: \"pool5\"\n  type: \"Pooling\"\n  bottom: \"conv5\"\n  top: \"pool5\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride:2\n  }\n}\n\n# CONV6-RELU6-POOL6\nlayer {\n  name: \"conv6\"\n  type: \"Convolution\"\n  bottom: \"pool5\"\n  top: \"conv6\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  convolution_param {\n    num_output: 256\n    kernel_size: 3\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu6\"\n  type: \"ReLU\"\n  bottom: \"conv6\"\n  top: \"conv6\"\n}\nlayer {\n  name: \"pool6\"\n  type: \"Pooling\"\n  bottom: \"conv6\"\n  top: \"pool6\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride:2\n  }\n}\n\n# IP layers\nlayer {\n  name: \"ip1new\"\n  type: \"InnerProduct\"\n  bottom: \"pool6\"\n  top: \"ip1new\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  inner_product_param {\n    num_output: 1024\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"reluOnIp1\"\n  type: \"ReLU\"\n  bottom: \"ip1new\"\n  top: \"ip1new\"\n}\nlayer {\n  name: \"dropOnIp1\"\n  type: \"Dropout\"\n  dropout_param {\n    dropout_ratio: 0.3\n  }\n  bottom: \"ip1new\"\n  top: \"ip1new\"\n}\nlayer {\n  name: \"ip2new\"\n  type: \"InnerProduct\"\n  bottom: \"ip1new\"\n  top: \"ip2new\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  inner_product_param {\n    num_output: 1024\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"reluOnIp2\"\n  type: \"ReLU\"\n  bottom: \"ip2new\"\n  top: \"ip2new\"\n}\nlayer {\n  name: \"dropOnIp2\"\n  type: \"Dropout\"\n  dropout_param {\n    dropout_ratio: 0.3\n  }\n  bottom: \"ip2new\"\n  top: \"ip2new\"\n}\nlayer {\n  name: \"ip3new\"\n  type: \"InnerProduct\"\n  bottom: \"ip2new\"\n  top: \"ip3new\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  inner_product_param {\n    num_output: 176\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"accuracy\"\n  type: \"Accuracy\"\n  bottom: \"ip3new\"\n  bottom: \"label\"\n  top: \"accuracy\"\n  include {\n    phase: TEST\n  }\n}\nlayer {\n  name: \"loss\"\n  type: \"SoftmaxWithLoss\"\n  bottom: \"ip3new\"\n  bottom: \"label\"\n  top: \"loss\"\n}\n"
  },
  {
    "path": "prototxt/deploy.augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3.prototxt",
    "content": "name: \"LangNet\"\n# DATA LAYERS\ninput: \"data\"\ninput_dim: 1\ninput_dim: 1\ninput_dim: 256\ninput_dim: 768\n\n# CONV1-RELU1-POOL1\nlayer {\n  name: \"conv1\"\n  type: \"Convolution\"\n  bottom: \"data\"\n  top: \"conv1\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  convolution_param {\n    num_output: 32\n    kernel_size: 7\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu1\"\n  type: \"ReLU\"\n  bottom: \"conv1\"\n  top: \"conv1\"\n}\nlayer {\n  name: \"pool1\"\n  type: \"Pooling\"\n  bottom: \"conv1\"\n  top: \"pool1\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride: 2\n  }\n}\n\n# CONV2-RELU2-POOL2_\nlayer {\n  name: \"conv2\"\n  type: \"Convolution\"\n  bottom: \"pool1\"\n  top: \"conv2\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  convolution_param {\n    num_output: 64\n    kernel_size: 5\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu2\"\n  type: \"ReLU\"\n  bottom: \"conv2\"\n  top: \"conv2\"\n}\nlayer {\n  name: \"pool2\"\n  type: \"Pooling\"\n  bottom: \"conv2\"\n  top: \"pool2\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride: 2\n  }\n}\n\n# CONV3-RELU3-POOL3\nlayer {\n  name: \"conv3\"\n  type: \"Convolution\"\n  bottom: \"pool2\"\n  top: \"conv3\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  convolution_param {\n    num_output: 64\n    kernel_size: 3\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu3\"\n  type: \"ReLU\"\n  bottom: \"conv3\"\n  top: \"conv3\"\n}\nlayer {\n  name: \"pool3\"\n  type: \"Pooling\"\n  bottom: \"conv3\"\n  top: \"pool3\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride:2\n  }\n}\n\n# CONV4-RELU4-POOL4\nlayer {\n  name: \"conv4\"\n  type: \"Convolution\"\n  bottom: \"pool3\"\n  top: \"conv4\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  convolution_param {\n    num_output: 128\n    kernel_size: 3\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu4\"\n  type: \"ReLU\"\n  bottom: \"conv4\"\n  top: \"conv4\"\n}\nlayer {\n  name: \"pool4\"\n  type: \"Pooling\"\n  bottom: \"conv4\"\n  top: \"pool4\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride:2\n  }\n}\n\n# CONV5-RELU5-POOL5\nlayer {\n  name: \"conv5\"\n  type: \"Convolution\"\n  bottom: \"pool4\"\n  top: \"conv5\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  convolution_param {\n    num_output: 128\n    kernel_size: 3\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu5\"\n  type: \"ReLU\"\n  bottom: \"conv5\"\n  top: \"conv5\"\n}\nlayer {\n  name: \"pool5\"\n  type: \"Pooling\"\n  bottom: \"conv5\"\n  top: \"pool5\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride:2\n  }\n}\n\n# CONV6-RELU6-POOL6\nlayer {\n  name: \"conv6\"\n  type: \"Convolution\"\n  bottom: \"pool5\"\n  top: \"conv6\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  convolution_param {\n    num_output: 256\n    kernel_size: 3\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu6\"\n  type: \"ReLU\"\n  bottom: \"conv6\"\n  top: \"conv6\"\n}\nlayer {\n  name: \"pool6\"\n  type: \"Pooling\"\n  bottom: \"conv6\"\n  top: \"pool6\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride:2\n  }\n}\n\n# IP layers\nlayer {\n  name: \"ip1new\"\n  type: \"InnerProduct\"\n  bottom: \"pool6\"\n  top: \"ip1new\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  inner_product_param {\n    num_output: 1024\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"reluOnIp1\"\n  type: \"ReLU\"\n  bottom: \"ip1new\"\n  top: \"ip1new\"\n}\nlayer {\n  name: \"dropOnIp1\"\n  type: \"Dropout\"\n  dropout_param {\n    dropout_ratio: 0.3\n  }\n  bottom: \"ip1new\"\n  top: \"ip1new\"\n}\nlayer {\n  name: \"ip2new\"\n  type: \"InnerProduct\"\n  bottom: \"ip1new\"\n  top: \"ip2new\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  inner_product_param {\n    num_output: 1024\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"reluOnIp2\"\n  type: \"ReLU\"\n  bottom: \"ip2new\"\n  top: \"ip2new\"\n}\nlayer {\n  name: \"dropOnIp2\"\n  type: \"Dropout\"\n  dropout_param {\n    dropout_ratio: 0.3\n  }\n  bottom: \"ip2new\"\n  top: \"ip2new\"\n}\nlayer {\n  name: \"ip3new\"\n  type: \"InnerProduct\"\n  bottom: \"ip2new\"\n  top: \"ip3new\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  inner_product_param {\n    num_output: 176\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"loss\"\n  type: \"Softmax\"\n  bottom: \"ip3new\"\n  top: \"loss\"\n}\n"
  },
  {
    "path": "prototxt/deploy.main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR.prototxt",
    "content": "name: \"LangNet\"\n# DATA LAYERS\ninput: \"data\"\ninput_dim: 1\ninput_dim: 1\ninput_dim: 256\ninput_dim: 858\n\n# CONV1-RELU1-POOL1\nlayer {\n  name: \"conv1\"\n  type: \"Convolution\"\n  bottom: \"data\"\n  top: \"conv1\"\n  param {\n    lr_mult: 15\n  }\n  param {\n    lr_mult: 30\n  }\n  convolution_param {\n    num_output: 32\n    kernel_size: 7\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu1\"\n  type: \"ReLU\"\n  bottom: \"conv1\"\n  top: \"conv1\"\n}\nlayer {\n  name: \"pool1\"\n  type: \"Pooling\"\n  bottom: \"conv1\"\n  top: \"pool1\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride: 2\n  }\n}\n\n# CONV2-RELU2-POOL2_\nlayer {\n  name: \"conv2\"\n  type: \"Convolution\"\n  bottom: \"pool1\"\n  top: \"conv2\"\n  param {\n    lr_mult: 12\n  }\n  param {\n    lr_mult: 24\n  }\n  convolution_param {\n    num_output: 64\n    kernel_size: 5\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu2\"\n  type: \"ReLU\"\n  bottom: \"conv2\"\n  top: \"conv2\"\n}\nlayer {\n  name: \"pool2\"\n  type: \"Pooling\"\n  bottom: \"conv2\"\n  top: \"pool2\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride: 2\n  }\n}\n\n# CONV3-RELU3-POOL3\nlayer {\n  name: \"conv3\"\n  type: \"Convolution\"\n  bottom: \"pool2\"\n  top: \"conv3\"\n  param {\n    lr_mult: 9\n  }\n  param {\n    lr_mult: 18\n  }\n  convolution_param {\n    num_output: 64\n    kernel_size: 3\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu3\"\n  type: \"ReLU\"\n  bottom: \"conv3\"\n  top: \"conv3\"\n}\nlayer {\n  name: \"pool3\"\n  type: \"Pooling\"\n  bottom: \"conv3\"\n  top: \"pool3\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride:2\n  }\n}\n\n# CONV4-RELU4-POOL4\nlayer {\n  name: \"conv4\"\n  type: \"Convolution\"\n  bottom: \"pool3\"\n  top: \"conv4\"\n  param {\n    lr_mult: 4\n  }\n  param {\n    lr_mult: 8\n  }\n  convolution_param {\n    num_output: 128\n    kernel_size: 3\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu4\"\n  type: \"ReLU\"\n  bottom: \"conv4\"\n  top: \"conv4\"\n}\nlayer {\n  name: \"pool4\"\n  type: \"Pooling\"\n  bottom: \"conv4\"\n  top: \"pool4\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride:2\n  }\n}\n\n# CONV5-RELU5-POOL5\nlayer {\n  name: \"conv5\"\n  type: \"Convolution\"\n  bottom: \"pool4\"\n  top: \"conv5\"\n  param {\n    lr_mult: 2\n  }\n  param {\n    lr_mult: 4\n  }\n  convolution_param {\n    num_output: 128\n    kernel_size: 3\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu5\"\n  type: \"ReLU\"\n  bottom: \"conv5\"\n  top: \"conv5\"\n}\nlayer {\n  name: \"pool5\"\n  type: \"Pooling\"\n  bottom: \"conv5\"\n  top: \"pool5\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride:2\n  }\n}\n\n# CONV6-RELU6-POOL6\nlayer {\n  name: \"conv6\"\n  type: \"Convolution\"\n  bottom: \"pool5\"\n  top: \"conv6\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  convolution_param {\n    num_output: 256\n    kernel_size: 3\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu6\"\n  type: \"ReLU\"\n  bottom: \"conv6\"\n  top: \"conv6\"\n}\nlayer {\n  name: \"pool6\"\n  type: \"Pooling\"\n  bottom: \"conv6\"\n  top: \"pool6\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride:2\n  }\n}\n\n# IP layers\nlayer {\n  name: \"ip1\"\n  type: \"InnerProduct\"\n  bottom: \"pool6\"\n  top: \"ip1\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  inner_product_param {\n    num_output: 1024\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"reluOnIp1\"\n  type: \"ReLU\"\n  bottom: \"ip1\"\n  top: \"ip1\"\n}\nlayer {\n  name: \"dropOnIp1\"\n  type: \"Dropout\"\n  dropout_param {\n    dropout_ratio: 0.5\n  }\n  bottom: \"ip1\"\n  top: \"ip1\"\n}\nlayer {\n  name: \"ip2\"\n  type: \"InnerProduct\"\n  bottom: \"ip1\"\n  top: \"ip2\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  inner_product_param {\n    num_output: 1024\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"reluOnIp2\"\n  type: \"ReLU\"\n  bottom: \"ip2\"\n  top: \"ip2\"\n}\nlayer {\n  name: \"dropOnIp2\"\n  type: \"Dropout\"\n  dropout_param {\n    dropout_ratio: 0.5\n  }\n  bottom: \"ip2\"\n  top: \"ip2\"\n}\nlayer {\n  name: \"ip3\"\n  type: \"InnerProduct\"\n  bottom: \"ip2\"\n  top: \"ip3\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  inner_product_param {\n    num_output: 176\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"loss\"\n  type: \"Softmax\"\n  bottom: \"ip3\"\n  top: \"loss\"\n}\n"
  },
  {
    "path": "prototxt/main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR.prototxt",
    "content": "name: \"LangNet\"\n# DATA LAYERS\nlayer {\n  name: \"mnist\"\n  type: \"Data\"\n  top: \"data\"\n  top: \"label\"\n  include {\n    phase: TRAIN\n  }\n  transform_param {\n    scale: 0.00390625\n  }\n  data_param {\n    source: \"train/traindb\"\n    batch_size: 32\n    backend: LEVELDB\n  }\n}\nlayer {\n  name: \"mnist\"\n  type: \"Data\"\n  top: \"data\"\n  top: \"label\"\n  include {\n    phase: TEST\n  }\n  transform_param {\n    scale: 0.00390625\n  }\n  data_param {\n    source: \"train/valdb\"\n    batch_size: 1\n    backend: LEVELDB\n  }\n}\n\n# CONV1-RELU1-POOL1\nlayer {\n  name: \"conv1\"\n  type: \"Convolution\"\n  bottom: \"data\"\n  top: \"conv1\"\n  param {\n    lr_mult: 15\n  }\n  param {\n    lr_mult: 30\n  }\n  convolution_param {\n    num_output: 32\n    kernel_size: 7\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu1\"\n  type: \"ReLU\"\n  bottom: \"conv1\"\n  top: \"conv1\"\n}\nlayer {\n  name: \"pool1\"\n  type: \"Pooling\"\n  bottom: \"conv1\"\n  top: \"pool1\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride: 2\n  }\n}\n\n# CONV2-RELU2-POOL2_\nlayer {\n  name: \"conv2\"\n  type: \"Convolution\"\n  bottom: \"pool1\"\n  top: \"conv2\"\n  param {\n    lr_mult: 12\n  }\n  param {\n    lr_mult: 24\n  }\n  convolution_param {\n    num_output: 64\n    kernel_size: 5\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu2\"\n  type: \"ReLU\"\n  bottom: \"conv2\"\n  top: \"conv2\"\n}\nlayer {\n  name: \"pool2\"\n  type: \"Pooling\"\n  bottom: \"conv2\"\n  top: \"pool2\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride: 2\n  }\n}\n\n# CONV3-RELU3-POOL3\nlayer {\n  name: \"conv3\"\n  type: \"Convolution\"\n  bottom: \"pool2\"\n  top: \"conv3\"\n  param {\n    lr_mult: 9\n  }\n  param {\n    lr_mult: 18\n  }\n  convolution_param {\n    num_output: 64\n    kernel_size: 3\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu3\"\n  type: \"ReLU\"\n  bottom: \"conv3\"\n  top: \"conv3\"\n}\nlayer {\n  name: \"pool3\"\n  type: \"Pooling\"\n  bottom: \"conv3\"\n  top: \"pool3\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride:2\n  }\n}\n\n# CONV4-RELU4-POOL4\nlayer {\n  name: \"conv4\"\n  type: \"Convolution\"\n  bottom: \"pool3\"\n  top: \"conv4\"\n  param {\n    lr_mult: 4\n  }\n  param {\n    lr_mult: 8\n  }\n  convolution_param {\n    num_output: 128\n    kernel_size: 3\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu4\"\n  type: \"ReLU\"\n  bottom: \"conv4\"\n  top: \"conv4\"\n}\nlayer {\n  name: \"pool4\"\n  type: \"Pooling\"\n  bottom: \"conv4\"\n  top: \"pool4\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride:2\n  }\n}\n\n# CONV5-RELU5-POOL5\nlayer {\n  name: \"conv5\"\n  type: \"Convolution\"\n  bottom: \"pool4\"\n  top: \"conv5\"\n  param {\n    lr_mult: 2\n  }\n  param {\n    lr_mult: 4\n  }\n  convolution_param {\n    num_output: 128\n    kernel_size: 3\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu5\"\n  type: \"ReLU\"\n  bottom: \"conv5\"\n  top: \"conv5\"\n}\nlayer {\n  name: \"pool5\"\n  type: \"Pooling\"\n  bottom: \"conv5\"\n  top: \"pool5\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride:2\n  }\n}\n\n# CONV6-RELU6-POOL6\nlayer {\n  name: \"conv6\"\n  type: \"Convolution\"\n  bottom: \"pool5\"\n  top: \"conv6\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  convolution_param {\n    num_output: 256\n    kernel_size: 3\n    stride: 1\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"relu6\"\n  type: \"ReLU\"\n  bottom: \"conv6\"\n  top: \"conv6\"\n}\nlayer {\n  name: \"pool6\"\n  type: \"Pooling\"\n  bottom: \"conv6\"\n  top: \"pool6\"\n  pooling_param {\n    pool: MAX\n    kernel_size: 3\n    stride:2\n  }\n}\n\n# IP layers\nlayer {\n  name: \"ip1\"\n  type: \"InnerProduct\"\n  bottom: \"pool6\"\n  top: \"ip1\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  inner_product_param {\n    num_output: 1024\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"reluOnIp1\"\n  type: \"ReLU\"\n  bottom: \"ip1\"\n  top: \"ip1\"\n}\nlayer {\n  name: \"dropOnIp1\"\n  type: \"Dropout\"\n  dropout_param {\n    dropout_ratio: 0.5\n  }\n  bottom: \"ip1\"\n  top: \"ip1\"\n}\nlayer {\n  name: \"ip2\"\n  type: \"InnerProduct\"\n  bottom: \"ip1\"\n  top: \"ip2\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  inner_product_param {\n    num_output: 1024\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"reluOnIp2\"\n  type: \"ReLU\"\n  bottom: \"ip2\"\n  top: \"ip2\"\n}\nlayer {\n  name: \"dropOnIp2\"\n  type: \"Dropout\"\n  dropout_param {\n    dropout_ratio: 0.5\n  }\n  bottom: \"ip2\"\n  top: \"ip2\"\n}\nlayer {\n  name: \"ip3\"\n  type: \"InnerProduct\"\n  bottom: \"ip2\"\n  top: \"ip3\"\n  param {\n    lr_mult: 1\n  }\n  param {\n    lr_mult: 2\n  }\n  inner_product_param {\n    num_output: 176\n    weight_filler {\n      type: \"xavier\"\n    }\n    bias_filler {\n      type: \"constant\"\n    }\n  }\n}\nlayer {\n  name: \"accuracy\"\n  type: \"Accuracy\"\n  bottom: \"ip3\"\n  bottom: \"label\"\n  top: \"accuracy\"\n  include {\n    phase: TEST\n  }\n}\nlayer {\n  name: \"loss\"\n  type: \"SoftmaxWithLoss\"\n  bottom: \"ip3\"\n  bottom: \"label\"\n  top: \"loss\"\n}\n"
  },
  {
    "path": "prototxt/solver.augm.nolrcoef.prototxt",
    "content": "net: \"prototxt/augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3.prototxt\"\n\ntest_iter: 512\ntest_interval: 1500\n\n# The base learning rate, momentum and the weight decay of the network.\nbase_lr: 0.01\nweight_decay: 0.0000\n\n# The learning rate policy\n# lr_policy: \"fixed\"\n# solver_type: ADADELTA\n\nlr_policy: \"inv\"\ngamma: 0.0003\npower: 0.9\n\n#lr_policy: \"step\"\n#gamma: 0.9\n#stepsize: 6000\n\ndisplay: 1\n\nmax_iter: 800000\n\nsnapshot: 3000\nsnapshot_prefix: \"models/augm_dropout0.3_on_augm84K-lr0.01_30K_90K\"\n#log: \"logs/augm_dropout0.3_on_augm84K-lr0.01_30K_90K.txt\"\nsolver_mode: GPU\n\n"
  },
  {
    "path": "prototxt/solver.main.adadelta.prototxt",
    "content": "net: \"prototxt/main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR.prototxt\"\n\ntest_iter: 100\ntest_interval: 100\n\n# The base learning rate, momentum and the weight decay of the network.\nweight_decay: 0.0000\n\n# The learning rate policy\nbase_lr: 0.01\nlr_policy: \"fixed\"\nsolver_type: ADADELTA\n\ndisplay: 1\n\nmax_iter: 800000\n\nsnapshot: 3000\nsnapshot_prefix: \"models/main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR_adadelta0.01\"\n#log: \"logs/main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR_adadelta0.01.txt\"\nsolver_mode: GPU\n\n"
  },
  {
    "path": "test_augm_network.py",
    "content": "import sys\nimport caffe\nimport numpy as np\n\ncaffe.set_mode_gpu()\n\n# info about classes\nfile = open('trainingData.csv')\ndata = file.readlines()[1:]\nlangs = set()\nfor line in data:\n    filepath, language = line.split(',')\n    language = language.strip()\n    langs.add(language)\nlangs = sorted(langs)\n\n\n# network parameters:\ndeploy_name = 'augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3'\nnetwork_name = 'augm_dropout0.3_on_augm84K-lr0.01_30K'\niterations = '90000'\naveSamples = 20 # average over this many samples\n\nnet = caffe.Classifier(model_file='prototxt/deploy.' + deploy_name + '.prototxt',\n                       pretrained_file='models/' + network_name + '_iter_' + iterations + '.caffemodel')\n\nnet.blobs['data'].reshape(1, 1, 256, 768)\npredict_set = sys.argv[1]\n\nif (predict_set == \"test\"):\n    folder = 'test/png/'\n    f = open('testingData.csv')\n    cnt = 12320\n    print_file = open('predictions/test_' + network_name + '_iter_' + iterations + '_' + str(aveSamples) + '.csv', 'w')\nelif (predict_set == \"val\"):\n    folder = '/home/brainstorm/caffe/Data/mnt/3/language/train/pngaugm/'\n    f = open('valEqual.csv')\n    cnt = 12320\n    print_file = open('predictions/validation_' + network_name + '_iter_' + iterations + '_' + str(aveSamples) + '.csv', 'w')\nelse: # train\n    folder = '/home/brainstorm/caffe/Data/mnt/3/language/train/pngaugm/'\n    f = open('trainEqual.csv')\n    cnt = 10000\n    print_file = open('predictions/train_' + network_name + '_iter_' + iterations + '_' + str(aveSamples) + '.csv', 'w')\n    \npreds = []\nlabels = []\ntopcoder_score = 0.0\nprocessed = 0\n\nfor iter in range(cnt):\n    st = f.readline()\n    if (predict_set == \"val\" or predict_set == \"train\"):\n        (name, label) = st.split(',')\n        label = int(label)\n    else:\n        name = st.strip()[:-4]\n    processed += 1\n    out = np.zeros((176, ))\n    for randomIndex in range(aveSamples):\n        image = caffe.io.load_image(folder + name + '.' + str(randomIndex) + '.png', color=False)\n        image = np.transpose(image, (2, 0, 1))\n        #image = np.concatenate([image, np.zeros((1, 256, 858 - 768), dtype=np.float32)], axis=2)\n        net.blobs['data'].data[...] = image\n        out += net.forward()['loss'][0]\n\n    pred = sorted([(x, it) for it, x in enumerate(out)], reverse=True)\n    \n    if (predict_set == \"val\" or predict_set == \"train\"):\n        if (pred[0][1] == label):\n            topcoder_score = topcoder_score + 1000\n        elif (pred[1][1] == label):\n            topcoder_score = topcoder_score + 400\n        elif (pred[2][1] == label): \n            topcoder_score = topcoder_score + 160\n    \n    for i in range(3):\n        lang_id = pred[i][1]\n        lang = langs[lang_id]\n        print_file.write(name + '.mp3,' + lang + ',' + str(i + 1) + '\\n')\n\n    if (iter % 100 == 0):\n        print >> sys.stderr, network_name + '_iter_' + iterations + '_' + str(aveSamples)\n        print >> sys.stderr, \"processed %d / %d images (%d samples/mp3)\" % (iter, cnt, aveSamples)\n        print >> sys.stderr, \"score: \", topcoder_score\n        print >> sys.stderr, \"expected score:\", topcoder_score / processed * 35200\n\nprint >> sys.stderr, \"Final score: \", topcoder_score, \" / \", cnt, \"000\"\nprint >> sys.stderr, \"expected score:\", topcoder_score / processed * 35200\n"
  },
  {
    "path": "test_main_network.py",
    "content": "import sys\nimport caffe\nimport numpy as np\n\ncaffe.set_mode_gpu()\n\n# info about classes\nfile = open('trainingData.csv')\ndata = file.readlines()[1:]\nlangs = set()\nfor line in data:\n    filepath, language = line.split(',')\n    language = language.strip()\n    langs.add(language)\nlangs = sorted(langs)\n\n\n# network parameters:\ndeploy_name = 'main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR'\nnetwork_name = deploy_name + '_150K-momentum'\niterations = '51000'\n\nnet = caffe.Classifier(model_file='prototxt/deploy.' + deploy_name + '.prototxt',\n                       pretrained_file='models/' + network_name + '_iter_' + iterations + '.caffemodel')\n\ntransformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})\ntransformer.set_transpose('data', (2, 0, 1))\nnet.blobs['data'].reshape(1, 1, 256, 858)\n\npredict_set = sys.argv[1]\n\nif (predict_set == \"test\"):\n    folder = 'test/png/'\n    f = open('testingData.csv')\n    cnt = 12320\n    print_file = open('predictions/test_' + network_name + '_iter_' + iterations + '.csv', 'w')\nelif (predict_set == \"val\"):\n    folder = '/home/brainstorm/caffe/Data/mnt/3/language/train/pngaugm/' ## stegh dreci augm\n    f = open('valDataNew.csv')\n    cnt = 16176\n    print_file = open('predictions/validation_' + network_name + '_iter_' + iterations + '.csv', 'w')\nelse: # train\n    folder = '/home/brainstorm/caffe/Data/mnt/3/language/train/png/'\n    f = open('trainingDataNew.csv')\n    cnt = 10000\n    print_file = open('predictions/train_' + network_name + '_iter_' + iterations + '.csv', 'w')\n    \npreds = []\nlabels = []\ntopcoder_score = 0\nprocessed = 0\n\nfor iter in range(cnt):\n    st = f.readline()\n    if (predict_set == \"val\" or predict_set == \"train\"):\n        (name, label) = st.split(',')\n        label = int(label)\n    else:\n        name = st.strip()[:-4]\n    processed += 1\n    \n    net.blobs['data'].data[...] = transformer.preprocess('data', \n        caffe.io.load_image(folder + name + '.png', color=False))\n    \n    out = net.forward()['loss'][0]\n\n    pred = sorted([(x, it) for it, x in enumerate(out)], reverse=True)\n    \n    if (predict_set == \"val\" or predict_set == \"train\"):\n        if (pred[0][1] == label):\n            topcoder_score = topcoder_score + 1000\n        elif (pred[1][1] == label):\n            topcoder_score = topcoder_score + 400\n        elif (pred[2][1] == label): \n            topcoder_score = topcoder_score + 160\n    \n    for i in range(3):\n        lang_id = pred[i][1]\n        lang = langs[lang_id]\n        print_file.write(name + '.mp3,' + lang + ',' + str(i + 1) + '\\n')\n\n    if (iter % 100 == 0):\n        print >> sys.stderr, \"processed %d / %d images\" % (iter, cnt)\n        print >> sys.stderr, \"score: \", topcoder_score\n        print >> sys.stderr, \"expected score:\", topcoder_score / processed * 35200\n\nprint >> sys.stderr, \"Final score: \", topcoder_score, \" / \", cnt, \"000\"\nprint >> sys.stderr, \"expected score:\", topcoder_score / processed * 35200\n"
  },
  {
    "path": "theano/README.md",
    "content": "# Spoken language identification\n\n`networks` folder contains multiple CNN and/or RNN models implemented in Theano/Lasagne.\n\nRead more in the corresponding [blog post](http://yerevann.github.io/2016/06/26/combining-cnn-and-rnn-for-spoken-language-identification/).\n"
  },
  {
    "path": "theano/main.py",
    "content": "import sys\nimport numpy as np\nimport sklearn.metrics as metrics\nimport argparse\nimport time\nimport json\nimport importlib\n\nprint \"==> parsing input arguments\"\nparser = argparse.ArgumentParser()\n\n# TODO: add argument to choose training set\nparser.add_argument('--network', type=str, default=\"network_batch\", help='embeding size (50, 100, 200, 300 only)')\nparser.add_argument('--epochs', type=int, default=500, help='number of epochs to train')\nparser.add_argument('--load_state', type=str, default=\"\", help='state file path')\nparser.add_argument('--mode', type=str, default=\"train\", help='mode: train/test/test_on_train')\nparser.add_argument('--batch_size', type=int, default=32, help='no commment')\nparser.add_argument('--l2', type=float, default=0, help='L2 regularization')\nparser.add_argument('--log_every', type=int, default=100, help='print information every x iteration')\nparser.add_argument('--save_every', type=int, default=50000, help='save state every x iteration')\nparser.add_argument('--prefix', type=str, default=\"\", help='optional prefix of network name')\nparser.add_argument('--dropout', type=float, default=0.0, help='dropout rate (between 0 and 1)')\nparser.add_argument('--no-batch_norm', dest=\"batch_norm\", action='store_false', help='batch normalization')\nparser.add_argument('--rnn_num_units', type=int, default=500, help='number of hidden units if the network is RNN')\nparser.add_argument('--equal_split', type=bool, default=False, help='use trainEqual.csv and valEqual.csv')\nparser.add_argument('--forward_cnt', type=int, default=1, help='if forward pass is nondeterministic, then how many forward passes are averaged')\n\nparser.set_defaults(batch_norm=True)\nargs = parser.parse_args()\nprint args\n\nif (args.equal_split):\n    train_listfile = open(\"/mnt/hdd615/Hrayr/Spoken-language-identification/trainEqual.csv\", \"r\")\n    test_listfile = open(\"/mnt/hdd615/Hrayr/Spoken-language-identification/valEqual.csv\", \"r\")\nelse:\n    train_listfile = open(\"/mnt/hdd615/Hrayr/Spoken-language-identification/trainingDataNew.csv\", \"r\")\n    test_listfile = open(\"/mnt/hdd615/Hrayr/Spoken-language-identification/valDataNew.csv\", \"r\")\n\ntrain_list_raw = train_listfile.readlines()\ntest_list_raw = test_listfile.readlines()\n\nprint \"==> %d training examples\" % len(train_list_raw)\nprint \"==> %d validation examples\" % len(test_list_raw)\n\ntrain_listfile.close()\ntest_listfile.close()\n\nargs_dict = dict(args._get_kwargs())\nargs_dict['train_list_raw'] = train_list_raw\nargs_dict['test_list_raw'] = test_list_raw\nargs_dict['png_folder'] = \"/mnt/hdd615/Hrayr/Spoken-language-identification/train/png/\"\n    \n\n\nprint \"==> using network %s\" % args.network\nnetwork_module = importlib.import_module(\"networks.\" + args.network)\nnetwork = network_module.Network(**args_dict)\n\n\nnetwork_name = args.prefix + '%s.bs%d%s%s' % (\n    network.say_name(),\n    args.batch_size, \n    \".bn\" if args.batch_norm else \"\", \n    (\".d\" + str(args.dropout)) if args.dropout>0 else \"\")\n    \nprint \"==> network_name:\", network_name\n\n\nstart_epoch = 0\nif args.load_state != \"\":\n    start_epoch = network.load_state(args.load_state) + 1\n\ndef do_epoch(mode, epoch):\n    # mode is 'train' or 'test' or 'predict'\n    y_true = []\n    y_pred = []\n    avg_loss = 0.0\n    prev_time = time.time()\n\n    batches_per_epoch = network.get_batches_per_epoch(mode)\n    all_prediction = []\n\n    for i in range(0, batches_per_epoch):\n        step_data = network.step(i, mode)\n        prediction = step_data[\"prediction\"]\n        answers = step_data[\"answers\"]\n        current_loss = step_data[\"current_loss\"]\n        log = step_data[\"log\"]\n        \n        avg_loss += current_loss\n        if (mode == \"predict\" or mode == \"predict_on_train\"):\n            all_prediction.append(prediction)\n            for pass_id in range(args.forward_cnt-1):\n                step_data = network.step(i, mode)\n                prediction += step_data[\"prediction\"]\n                current_loss += step_data[\"current_loss\"]\n            prediction /= args.forward_cnt\n            current_loss /= args.forward_cnt\n            \n        for x in answers:\n            y_true.append(x)\n        \n        for x in prediction.argmax(axis=1):\n            y_pred.append(x)\n        \n        if ((i + 1) % args.log_every == 0):\n            cur_time = time.time()\n            print (\"  %sing: %d.%d / %d \\t loss: %3f \\t avg_loss: %.5f \\t %s \\t time: %.2fs\" % \n                (mode, epoch, (i + 1) * args.batch_size, batches_per_epoch * args.batch_size, \n                 current_loss, avg_loss / (i + 1), log, cur_time - prev_time))\n            prev_time = cur_time\n      \n    \n    #print \"confusion matrix:\"\n    #print metrics.confusion_matrix(y_true, y_pred)\n    accuracy = sum([1 if t == p else 0 for t, p in zip(y_true, y_pred)])\n    print \"accuracy: %.2f percent\" % (accuracy * 100.0 / batches_per_epoch / args.batch_size)\n    \n    if (mode == \"predict\"):\n        all_prediction = np.vstack(all_prediction)\n        pred_filename = \"predictions/\" + (\"equal_split.\" if args.equal_split else \"\") + \\\n                         args.load_state[args.load_state.rfind('/')+1:] + \".csv\"\n        with open(pred_filename, 'w') as pred_csv:\n            for x in all_prediction:\n                print >> pred_csv, \",\".join([(\"%.6f\" % prob) for prob in x])\n                    \n    return avg_loss / batches_per_epoch\n\n\nif args.mode == 'train':\n    print \"==> training\"   \t\n    for epoch in range(start_epoch, args.epochs):\n        do_epoch('train', epoch)\n        test_loss = do_epoch('test', epoch)\n        state_name = 'states/%s.epoch%d.test%.5f.state' % (network_name, epoch, test_loss)\n        print \"==> saving ... %s\" % state_name\n        network.save_params(state_name, epoch)\n        \nelif args.mode == 'test':\n    do_epoch('predict', 0)\nelif args.mode == 'test_on_train':\n    do_epoch('predict_on_train', 0)\nelse:\n    raise Exception(\"unknown mode\")"
  },
  {
    "path": "theano/networks/__init__.py",
    "content": ""
  },
  {
    "path": "theano/networks/base_network.py",
    "content": "import cPickle as pickle\n\n\nclass BaseNetwork:\n\t\n\tdef say_name(self):\n\t\treturn \"unknown\"\n\t\n\t\n\tdef save_params(self, file_name, epoch, **kwargs):\n\t\twith open(file_name, 'w') as save_file:\n\t\t\tpickle.dump(\n\t\t\t\tobj = {\n\t\t\t\t\t'params' : [x.get_value() for x in self.params],\n\t\t\t\t\t'epoch' : epoch, \n\t\t\t\t},\n\t\t\t\tfile = save_file,\n\t\t\t\tprotocol = -1\n\t\t\t)\n\t\n\t\n\tdef load_state(self, file_name):\n\t\tprint \"==> loading state %s\" % file_name\n\t\tepoch = 0\n\t\twith open(file_name, 'r') as load_file:\n\t\t\tdict = pickle.load(load_file)\n\t\t\tloaded_params = dict['params']\n\t\t\tfor (x, y) in zip(self.params, loaded_params):\n\t\t\t\tx.set_value(y)\n\t\t\tepoch = dict['epoch']\n\t\treturn epoch\n\n\n\tdef get_batches_per_epoch(self, mode):\n\t\tif (mode == 'train' or mode == 'predict_on_train'):\n\t\t\treturn len(self.train_list_raw) / self.batch_size\n\t\telif (mode == 'test' or mode == 'predict'):\n\t\t\treturn len(self.test_list_raw) / self.batch_size\n\t\telse:\n\t\t\traise Exception(\"unknown mode\")\n\t\n\t\n\tdef step(self, batch_index, mode):\n\t\t\n\t\tif (mode == \"train\"):\n\t\t\tdata, answers = self.read_batch(self.train_list_raw, batch_index)\n\t\t\ttheano_fn = self.train_fn\n\t\telif (mode == \"test\" or mode == \"predict\"):\n\t\t\tdata, answers = self.read_batch(self.test_list_raw, batch_index)\n\t\t\ttheano_fn = self.test_fn\n\t\telif (mode == \"predict_on_train\"):\n\t\t\tdata, answers = self.read_batch(self.train_list_raw, batch_index)\n\t\t\ttheano_fn = self.test_fn\n\t\telse:\n\t\t\traise Exception(\"unrecognized mode\")\n\t\t\n\t\tret = theano_fn(data, answers)\n\t\treturn {\"prediction\": ret[0],\n\t\t\t\t\"answers\": answers,\n\t\t\t\t\"current_loss\": ret[1],\n\t\t\t\t\"log\": \"\",\n\t\t\t\t}"
  },
  {
    "path": "theano/networks/rnn.py",
    "content": "import random\nimport numpy as np\n\nimport theano\nimport theano.tensor as T\n\nimport lasagne\nfrom lasagne import layers\nfrom lasagne.nonlinearities import rectify, softmax, sigmoid, tanh\n\nimport PIL.Image as Image\nfrom base_network import BaseNetwork\n\nfloatX = theano.config.floatX\n\n\nclass Network(BaseNetwork):\n    \n    def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, l2, mode, rnn_num_units, **kwargs):\n        \n        print \"==> not used params in DMN class:\", kwargs.keys()\n        self.train_list_raw = train_list_raw\n        self.test_list_raw = test_list_raw\n        self.png_folder = png_folder\n        self.batch_size = batch_size\n        self.l2 = l2\n        self.mode = mode\n        self.num_units = rnn_num_units\n        \n        self.input_var = T.tensor3('input_var')\n        self.answer_var = T.ivector('answer_var')\n        \n        print \"==> building network\"\n        example = np.random.uniform(size=(self.batch_size, 858, 256), low=0.0, high=1.0).astype(np.float32) #########\n        answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########\n\n        # InputLayer       \n        network = layers.InputLayer(shape=(None, 858, 256), input_var=self.input_var)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n\n        # GRULayer\n        network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n        # Last layer: classification\n        network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n\n        self.params = layers.get_all_params(network, trainable=True)\n        self.prediction = layers.get_output(network)\n        \n        self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()\n        if (self.l2 > 0):\n            self.loss_l2 = self.l2 * lasagne.regularization.regularize_network_params(network, \n                                                                    lasagne.regularization.l2)\n        else:\n            self.loss_l2 = 0\n        self.loss = self.loss_ce + self.loss_l2\n        \n        #updates = lasagne.updates.adadelta(self.loss, self.params)\n        updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.0005)\n        \n        if self.mode == 'train':\n            print \"==> compiling train_fn\"\n            self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], \n                                            outputs=[self.prediction, self.loss],\n                                            updates=updates)\n        \n        print \"==> compiling test_fn\"\n        self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],\n                                       outputs=[self.prediction, self.loss])\n    \n    \n    def say_name(self):\n        return \"rnn.GRU.num_units%d\" % self.num_units\n    \n    \n    def read_batch(self, data_raw, batch_index):\n\n        start_index = batch_index * self.batch_size\n        end_index = start_index + self.batch_size\n        \n        data = np.zeros((self.batch_size, 858, 256), dtype=np.float32)\n        answers = []\n        \n        for i in range(start_index, end_index):\n            answers.append(int(data_raw[i].split(',')[1]))\n            name = data_raw[i].split(',')[0]\n            path = self.png_folder + name + \".png\"\n            im = Image.open(path)\n            data[i - start_index, :, :] = np.transpose(np.array(im).astype(np.float32) / 256.0)\n\n        answers = np.array(answers, dtype=np.int32)\n        return data, answers\n    \n    \n"
  },
  {
    "path": "theano/networks/rnn_2layers.py",
    "content": "import random\nimport numpy as np\n\nimport theano\nimport theano.tensor as T\n\nimport lasagne\nfrom lasagne import layers\nfrom lasagne.nonlinearities import rectify, softmax, sigmoid, tanh\n\nimport PIL.Image as Image\nfrom base_network import BaseNetwork\n\nfloatX = theano.config.floatX\n\n\nclass Network(BaseNetwork):\n    \n    def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, l2, mode, rnn_num_units, batch_norm, **kwargs):\n        \n        print \"==> not used params in DMN class:\", kwargs.keys()\n        self.train_list_raw = train_list_raw\n        self.test_list_raw = test_list_raw\n        self.png_folder = png_folder\n        self.batch_size = batch_size\n        self.l2 = l2\n        self.mode = mode\n        self.num_units = rnn_num_units\n        self.batch_norm = batch_norm\n        \n        self.input_var = T.tensor3('input_var')\n        self.answer_var = T.ivector('answer_var')\n        \n        # scale inputs to be in [-1, 1]\n        input_var_norm = 2 * self.input_var - 1\n        \n        print \"==> building network\"\n        example = np.random.uniform(size=(self.batch_size, 858, 256), low=0.0, high=1.0).astype(np.float32) #########\n        answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########\n\n        # InputLayer       \n        network = layers.InputLayer(shape=(None, 858, 256), input_var=input_var_norm)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n\n        # GRULayer\n        network = layers.GRULayer(incoming=network, num_units=self.num_units)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n        # BatchNormalization Layer\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n            print layers.get_output(network).eval({self.input_var:example}).shape\n        \n        # GRULayer\n        network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n        # Last layer: classification\n        network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n\n        self.params = layers.get_all_params(network, trainable=True)\n        self.prediction = layers.get_output(network)\n        \n        self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()\n        if (self.l2 > 0):\n            self.loss_l2 = self.l2 * lasagne.regularization.regularize_network_params(network, \n                                                                    lasagne.regularization.l2)\n        else:\n            self.loss_l2 = 0\n        self.loss = self.loss_ce + self.loss_l2\n        \n        #updates = lasagne.updates.adadelta(self.loss, self.params)\n        updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)\n        \n        if self.mode == 'train':\n            print \"==> compiling train_fn\"\n            self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], \n                                            outputs=[self.prediction, self.loss],\n                                            updates=updates)\n        \n        print \"==> compiling test_fn\"\n        self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],\n                                       outputs=[self.prediction, self.loss])\n    \n    \n    def say_name(self):\n        return \"rnn_2layers.GRU.num_units%d\" % self.num_units\n    \n    \n    def read_batch(self, data_raw, batch_index):\n\n        start_index = batch_index * self.batch_size\n        end_index = start_index + self.batch_size\n        \n        data = np.zeros((self.batch_size, 858, 256), dtype=np.float32)\n        answers = []\n        \n        for i in range(start_index, end_index):\n            answers.append(int(data_raw[i].split(',')[1]))\n            name = data_raw[i].split(',')[0]\n            path = self.png_folder + name + \".png\"\n            im = Image.open(path)\n            data[i - start_index, :, :] = np.transpose(np.array(im).astype(np.float32) / 256.0)\n\n        answers = np.array(answers, dtype=np.int32)\n        return data, answers\n    \n    "
  },
  {
    "path": "theano/networks/rnn_2layers_5khz.py",
    "content": "import random\nimport numpy as np\n\nimport theano\nimport theano.tensor as T\n\nimport lasagne\nfrom lasagne import layers\nfrom lasagne.nonlinearities import rectify, softmax, sigmoid, tanh\n\nimport PIL.Image as Image\nfrom base_network import BaseNetwork\n\nfloatX = theano.config.floatX\n\n\nclass Network(BaseNetwork):\n    \n    def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, l2, mode, rnn_num_units, batch_norm, **kwargs):\n        \n        print \"==> not used params in network class:\", kwargs.keys()\n        self.train_list_raw = train_list_raw\n        self.test_list_raw = test_list_raw\n        self.png_folder = png_folder\n        self.batch_size = batch_size\n        self.l2 = l2\n        self.mode = mode\n        self.num_units = rnn_num_units\n        self.batch_norm = batch_norm\n        \n        self.input_var = T.tensor3('input_var')\n        self.answer_var = T.ivector('answer_var')\n        \n        # scale inputs to be in [-1, 1]\n        input_var_norm = 2 * self.input_var - 1\n        \n        print \"==> building network\"\n        example = np.random.uniform(size=(self.batch_size, 858, 128), low=0.0, high=1.0).astype(np.float32) #########\n        answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########\n\n        # InputLayer       \n        network = layers.InputLayer(shape=(None, 858, 128), input_var=input_var_norm)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n\n        # GRULayer\n        network = layers.GRULayer(incoming=network, num_units=self.num_units)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n        # BatchNormalization Layer\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n            print layers.get_output(network).eval({self.input_var:example}).shape\n        \n        # GRULayer\n        network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n        # BatchNormalization Layer\n        # There are some states, where this layer was disabled\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n            print layers.get_output(network).eval({self.input_var:example}).shape\n        \n        # Last layer: classification\n        network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n\n        self.params = layers.get_all_params(network, trainable=True)\n        self.prediction = layers.get_output(network)\n        \n        self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()\n        if (self.l2 > 0):\n            self.loss_l2 = self.l2 * lasagne.regularization.regularize_network_params(network, \n                                                                    lasagne.regularization.l2)\n        else:\n            self.loss_l2 = 0\n        self.loss = self.loss_ce + self.loss_l2\n        \n        updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)\n        \n        if self.mode == 'train':\n            print \"==> compiling train_fn\"\n            self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], \n                                            outputs=[self.prediction, self.loss],\n                                            updates=updates)\n        \n        print \"==> compiling test_fn\"\n        self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],\n                                       outputs=[self.prediction, self.loss])\n    \n    \n    def say_name(self):\n        return \"rnn_2layers_5khz.GRU.num_units%d\" % self.num_units\n    \n    \n    def read_batch(self, data_raw, batch_index):\n\n        start_index = batch_index * self.batch_size\n        end_index = start_index + self.batch_size\n        \n        data = np.zeros((self.batch_size, 858, 128), dtype=np.float32)\n        answers = []\n        \n        for i in range(start_index, end_index):\n            answers.append(int(data_raw[i].split(',')[1]))\n            name = data_raw[i].split(',')[0]\n            path = self.png_folder + name + \".png\"\n            im = Image.open(path)\n            data[i - start_index, :, :] = np.transpose(np.array(im).astype(np.float32) / 256.0)[:, :128]\n\n        answers = np.array(answers, dtype=np.int32)\n        return data, answers\n    \n    "
  },
  {
    "path": "theano/networks/tc_net.py",
    "content": "import random\nimport numpy as np\n\nimport theano\nimport theano.tensor as T\n\nimport lasagne\nfrom lasagne import layers\nfrom lasagne.nonlinearities import rectify, softmax, sigmoid, tanh\n\nimport PIL.Image as Image\nfrom base_network import BaseNetwork\n\nfloatX = theano.config.floatX\n\n\nclass Network(BaseNetwork):\n    \n    def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, **kwargs):\n        \n        print \"==> not used params in DMN class:\", kwargs.keys()\n        self.train_list_raw = train_list_raw\n        self.test_list_raw = test_list_raw\n        self.png_folder = png_folder\n        self.batch_size = batch_size\n        self.dropout = dropout\n        self.l2 = l2\n        self.mode = mode\n        self.batch_norm = batch_norm\n        \n        self.input_var = T.tensor4('input_var')\n        self.answer_var = T.ivector('answer_var')\n        \n        print \"==> building network\"\n        example = np.random.uniform(size=(self.batch_size, 1, 256, 858), low=0.0, high=1.0).astype(np.float32) #########\n        answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########\n       \n        network = layers.InputLayer(shape=(None, 1, 256, 858), input_var=self.input_var)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n        # CONV-RELU-POOL 1\n        network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # CONV-RELU-POOL 2\n        network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n\n        \n        # CONV-RELU-POOL 3\n        network = layers.Conv2DLayer(incoming=network, num_filters=64, filter_size=(3, 3), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # CONV-RELU-POOL 4\n        network = layers.Conv2DLayer(incoming=network, num_filters=128, filter_size=(3, 3), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # CONV-RELU-POOL 5\n        network = layers.Conv2DLayer(incoming=network, num_filters=128, filter_size=(3, 3), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # CONV-RELU-POOL 6\n        network = layers.Conv2DLayer(incoming=network, num_filters=256, filter_size=(3, 3), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=(3, 2), ignore_border=False)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # DENSE 1\n        network = layers.DenseLayer(incoming=network, num_units=1024, nonlinearity=rectify)\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        if (self.dropout > 0):\n            network = layers.dropout(network, self.dropout)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n        \"\"\"\n        # DENSE 2\n        network = layers.DenseLayer(incoming=network, num_units=1024, nonlinearity=rectify)\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        if (self.dropout > 0):\n            network = layers.dropout(network, self.dropout)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \"\"\"\n        \n        # Last layer: classification\n        network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n        \n        self.params = layers.get_all_params(network, trainable=True)\n        self.prediction = layers.get_output(network)\n        \n        self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()\n        if (self.l2 > 0):\n            self.loss_l2 = self.l2 * lasagne.regularization.regularize_network_params(network, \n                                                                    lasagne.regularization.l2)\n        else:\n            self.loss_l2 = 0\n        self.loss = self.loss_ce + self.loss_l2\n        \n        #updates = lasagne.updates.adadelta(self.loss, self.params)\n        updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)\n        \n        if self.mode == 'train':\n            print \"==> compiling train_fn\"\n            self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], \n                                            outputs=[self.prediction, self.loss],\n                                            updates=updates)\n        \n        print \"==> compiling test_fn\"\n        self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],\n                                       outputs=[self.prediction, self.loss])\n    \n    \n    def say_name(self):\n        return \"tc_net\"\n    \n    \n    def read_batch(self, data_raw, batch_index):\n\n        start_index = batch_index * self.batch_size\n        end_index = start_index + self.batch_size\n        \n        data = np.zeros((self.batch_size, 1, 256, 858), dtype=np.float32)\n        answers = []\n        \n        for i in range(start_index, end_index):\n            answers.append(int(data_raw[i].split(',')[1]))\n            name = data_raw[i].split(',')[0]\n            path = self.png_folder + name + \".png\"\n            im = Image.open(path)\n            data[i - start_index, 0, :, :] = np.array(im).astype(np.float32) / 256.0\n\n        answers = np.array(answers, dtype=np.int32)\n        return data, answers\n    \n    "
  },
  {
    "path": "theano/networks/tc_net_deeprnn_shared_pad.py",
    "content": "import random\nimport numpy as np\n\nimport theano\nimport theano.tensor as T\n\nimport lasagne\nfrom lasagne import layers\nfrom lasagne.nonlinearities import rectify, softmax, sigmoid, tanh\n\nimport PIL.Image as Image\nfrom base_network import BaseNetwork\n\nfloatX = theano.config.floatX\n\nclass Network(BaseNetwork):\n    \n    def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs):\n        \n        print \"==> not used params in DMN class:\", kwargs.keys()\n        self.train_list_raw = train_list_raw\n        self.test_list_raw = test_list_raw\n        self.png_folder = png_folder\n        self.batch_size = batch_size\n        self.dropout = dropout\n        self.l2 = l2\n        self.mode = mode\n        self.batch_norm = batch_norm\n        self.num_units = rnn_num_units\n        \n        self.input_var = T.tensor4('input_var')\n        self.answer_var = T.ivector('answer_var')\n        \n        print \"==> building network\"\n        example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) #########\n        answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########\n       \n        network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n        # CONV-RELU-POOL 1\n        network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # CONV-RELU-POOL 2\n        network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n\n        \n        # CONV-RELU-POOL 3\n        network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # CONV-RELU-POOL 4\n        network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        self.params = layers.get_all_params(network, trainable=True)\n        \n        output = layers.get_output(network)\n        num_channels  = 32 \n        filter_W = 54\n        filter_H = 8\n        \n        # NOTE: these constants are shapes of last pool layer, it can be symbolic \n        # explicit values are better for optimizations\n        \n        channels = []\n        for channel_index in range(num_channels):\n            channels.append(output[:, channel_index, :, :].transpose((0, 2, 1)))\n        \n        rnn_network_outputs = []\n        W_in_to_updategate = None\n        W_hid_to_updategate = None\n        b_updategate = None\n        W_in_to_resetgate = None\n        W_hid_to_resetgate = None\n        b_resetgate = None\n        W_in_to_hidden_update = None\n        W_hid_to_hidden_update = None\n        b_hidden_update = None\n        \n        W_in_to_updategate1 = None\n        W_hid_to_updategate1 = None\n        b_updategate1 = None\n        W_in_to_resetgate1 = None\n        W_hid_to_resetgate1 = None\n        b_resetgate1 = None\n        W_in_to_hidden_update1 = None\n        W_hid_to_hidden_update1 = None\n        b_hidden_update1 = None\n        \n        for channel_index in range(num_channels):\n            rnn_input_var = channels[channel_index]\n            \n            # InputLayer       \n            network = layers.InputLayer(shape=(None, filter_W, filter_H), input_var=rnn_input_var)\n\n            if (channel_index == 0):\n                # GRULayer\n                network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=False)\n                W_in_to_updategate = network.W_in_to_updategate\n                W_hid_to_updategate = network.W_hid_to_updategate\n                b_updategate = network.b_updategate\n                W_in_to_resetgate = network.W_in_to_resetgate\n                W_hid_to_resetgate = network.W_hid_to_resetgate\n                b_resetgate = network.b_resetgate\n                W_in_to_hidden_update = network.W_in_to_hidden_update\n                W_hid_to_hidden_update = network.W_hid_to_hidden_update\n                b_hidden_update = network.b_hidden_update\n                \n                # BatchNormalization Layer\n                if (self.batch_norm):\n                    network = layers.BatchNormLayer(incoming=network)\n                \n                # GRULayer\n                network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)\n                W_in_to_updategate1 = network.W_in_to_updategate\n                W_hid_to_updategate1 = network.W_hid_to_updategate\n                b_updategate1 = network.b_updategate\n                W_in_to_resetgate1 = network.W_in_to_resetgate\n                W_hid_to_resetgate1 = network.W_hid_to_resetgate\n                b_resetgate1 = network.b_resetgate\n                W_in_to_hidden_update1 = network.W_in_to_hidden_update\n                W_hid_to_hidden_update1 = network.W_hid_to_hidden_update\n                b_hidden_update1 = network.b_hidden_update\n                        \n                # add params \n                self.params += layers.get_all_params(network, trainable=True)\n\n            else:\n                # GRULayer, but shared\n                network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=False,\n                            resetgate=layers.Gate(W_in=W_in_to_resetgate, W_hid=W_hid_to_resetgate, b=b_resetgate),\n                            updategate=layers.Gate(W_in=W_in_to_updategate, W_hid=W_hid_to_updategate, b=b_updategate),\n                            hidden_update=layers.Gate(W_in=W_in_to_hidden_update, W_hid=W_hid_to_hidden_update, b=b_hidden_update))\n                            \n                # BatchNormalization Layer\n                if (self.batch_norm):\n                    network = layers.BatchNormLayer(incoming=network)\n                    \n                # GRULayer, but shared\n                network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True,\n                            resetgate=layers.Gate(W_in=W_in_to_resetgate1, W_hid=W_hid_to_resetgate1, b=b_resetgate1),\n                            updategate=layers.Gate(W_in=W_in_to_updategate1, W_hid=W_hid_to_updategate1, b=b_updategate1),\n                            hidden_update=layers.Gate(W_in=W_in_to_hidden_update1, W_hid=W_hid_to_hidden_update1, b=b_hidden_update1))\n                \n            \n            rnn_network_outputs.append(layers.get_output(network))\n        \n        all_output_var = T.concatenate(rnn_network_outputs, axis=1)\n        print all_output_var.eval({self.input_var:example}).shape\n        \n        # InputLayer\n        network = layers.InputLayer(shape=(None, self.num_units * num_channels), input_var=all_output_var)\n        \n        # Dropout Layer\n        if (self.dropout > 0):\n            network = layers.dropout(network, self.dropout)\n        \n        # BatchNormalization Layer\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # Last layer: classification\n        network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n    \n        self.params += layers.get_all_params(network, trainable=True)\n        self.prediction = layers.get_output(network)\n    \n        #print \"==> param shapes\", [x.eval().shape for x in self.params]\n        \n        self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()\n        if (self.l2 > 0):\n            self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params, \n                                                                          lasagne.regularization.l2)\n        else:\n            self.loss_l2 = 0\n        self.loss = self.loss_ce + self.loss_l2\n        \n        #updates = lasagne.updates.adadelta(self.loss, self.params)\n        updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)\n        \n        if self.mode == 'train':\n            print \"==> compiling train_fn\"\n            self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], \n                                            outputs=[self.prediction, self.loss],\n                                            updates=updates)\n        \n        print \"==> compiling test_fn\"\n        self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],\n                                       outputs=[self.prediction, self.loss])\n    \n    \n    def say_name(self):\n        return \"tc_net_deeprnn.4conv.pad.GRU.shared.num_units%d.5khz\" % self.num_units\n    \n    \n    def read_batch(self, data_raw, batch_index):\n\n        start_index = batch_index * self.batch_size\n        end_index = start_index + self.batch_size\n        \n        data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32)\n        answers = []\n        \n        for i in range(start_index, end_index):\n            answers.append(int(data_raw[i].split(',')[1]))\n            name = data_raw[i].split(',')[0]\n            path = self.png_folder + name + \".png\"\n            im = Image.open(path)\n            data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0\n\n        answers = np.array(answers, dtype=np.int32)\n        return data, answers\n    \n    "
  },
  {
    "path": "theano/networks/tc_net_mod.py",
    "content": "import random\nimport numpy as np\n\nimport theano\nimport theano.tensor as T\n\nimport lasagne\nfrom lasagne import layers\nfrom lasagne.nonlinearities import rectify, softmax, sigmoid, tanh\n\nimport PIL.Image as Image\nfrom base_network import BaseNetwork\n\nfloatX = theano.config.floatX\n\n\nclass Network(BaseNetwork):\n    \n    def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, **kwargs):\n        \n        print \"==> not used params in DMN class:\", kwargs.keys()\n        self.train_list_raw = train_list_raw\n        self.test_list_raw = test_list_raw\n        self.png_folder = png_folder\n        self.batch_size = batch_size\n        self.dropout = dropout\n        self.l2 = l2\n        self.mode = mode\n        self.batch_norm = batch_norm\n        \n        self.input_var = T.tensor4('input_var')\n        self.answer_var = T.ivector('answer_var')\n        \n        print \"==> building network\"\n        example = np.random.uniform(size=(self.batch_size, 1, 256, 858), low=0.0, high=1.0).astype(np.float32) #########\n        answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########\n       \n        network = layers.InputLayer(shape=(None, 1, 256, 858), input_var=self.input_var)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n        \n        # NOTE: replace pad=2 with ignore_border=False\n        # CONV-RELU-POOL 1\n        network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # CONV-RELU-POOL 2\n        network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n\n        \n        # CONV-RELU-POOL 3\n        network = layers.Conv2DLayer(incoming=network, num_filters=64, filter_size=(3, 3), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # CONV-RELU-POOL 4\n        network = layers.Conv2DLayer(incoming=network, num_filters=128, filter_size=(3, 3), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # CONV-RELU-POOL 5\n        network = layers.Conv2DLayer(incoming=network, num_filters=128, filter_size=(3, 3), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # CONV-RELU-POOL 6\n        network = layers.Conv2DLayer(incoming=network, num_filters=256, filter_size=(3, 3), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=(3, 2), pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # DENSE 1\n        network = layers.DenseLayer(incoming=network, num_units=1024, nonlinearity=rectify)\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        if (self.dropout > 0):\n            network = layers.dropout(network, self.dropout)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n        \n        # Last layer: classification\n        network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n    \n        self.params = layers.get_all_params(network, trainable=True)\n        self.prediction = layers.get_output(network)\n    \n        print \"==> param shapes\", [x.eval().shape for x in self.params]\n        \n        self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()\n        if (self.l2 > 0):\n            self.loss_l2 = self.l2 * lasagne.regularization.regularize_network_params(network, \n                                                                    lasagne.regularization.l2)\n        else:\n            self.loss_l2 = 0\n        self.loss = self.loss_ce + self.loss_l2\n        \n        #updates = lasagne.updates.adadelta(self.loss, self.params)\n        updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)\n        \n        if self.mode == 'train':\n            print \"==> compiling train_fn\"\n            self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], \n                                            outputs=[self.prediction, self.loss],\n                                            updates=updates)\n        \n        print \"==> compiling test_fn\"\n        self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],\n                                       outputs=[self.prediction, self.loss])\n    \n    \n    def say_name(self):\n        return \"tc_net_mod\"\n    \n    \n    def read_batch(self, data_raw, batch_index):\n\n        start_index = batch_index * self.batch_size\n        end_index = start_index + self.batch_size\n        \n        data = np.zeros((self.batch_size, 1, 256, 858), dtype=np.float32)\n        answers = []\n        \n        for i in range(start_index, end_index):\n            answers.append(int(data_raw[i].split(',')[1]))\n            name = data_raw[i].split(',')[0]\n            path = self.png_folder + name + \".png\"\n            im = Image.open(path)\n            data[i - start_index, 0, :, :] = np.array(im).astype(np.float32) / 256.0\n\n        answers = np.array(answers, dtype=np.int32)\n        return data, answers\n    \n                \n"
  },
  {
    "path": "theano/networks/tc_net_mod_5khz_small.py",
    "content": "import random\nimport numpy as np\n\nimport theano\nimport theano.tensor as T\n\nimport lasagne\nfrom lasagne import layers\nfrom lasagne.nonlinearities import rectify, softmax, sigmoid, tanh\n\nimport PIL.Image as Image\nfrom base_network import BaseNetwork\n\nfloatX = theano.config.floatX\n\n\nclass Network(BaseNetwork):\n    \n    def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, **kwargs):\n        \n        print \"==> not used params in DMN class:\", kwargs.keys()\n        self.train_list_raw = train_list_raw\n        self.test_list_raw = test_list_raw\n        self.png_folder = png_folder\n        self.batch_size = batch_size\n        self.dropout = dropout\n        self.l2 = l2\n        self.mode = mode\n        self.batch_norm = batch_norm\n        \n        self.input_var = T.tensor4('input_var')\n        self.answer_var = T.ivector('answer_var')\n        \n        print \"==> building network\"\n        example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) #########\n        answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########\n       \n        network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n        # CONV-RELU-POOL 1\n        network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # CONV-RELU-POOL 2\n        network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n\n        \n        # CONV-RELU-POOL 3\n        network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # CONV-RELU-POOL 4\n        network = layers.Conv2DLayer(incoming=network, num_filters=64, filter_size=(3, 3), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # CONV-RELU-POOL 5\n        network = layers.Conv2DLayer(incoming=network, num_filters=64, filter_size=(3, 3), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n\n        \n        # DENSE 1\n        network = layers.DenseLayer(incoming=network, num_units=256, nonlinearity=rectify)\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        if (self.dropout > 0):\n            network = layers.dropout(network, self.dropout)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n\n                \n        # Last layer: classification\n        network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n\n        self.params = layers.get_all_params(network, trainable=True)\n        self.prediction = layers.get_output(network)\n        self.test_prediction = layers.get_output(network, deterministic=True)\n        \n        print \"==> param shapes\", [x.eval().shape for x in self.params]\n        \n        def get_loss(prediction):\n            loss_ce = lasagne.objectives.categorical_crossentropy(prediction, self.answer_var).mean()\n            if (self.l2 > 0):\n                loss_l2 = self.l2 * lasagne.regularization.regularize_network_params(network, \n                                                                        lasagne.regularization.l2)\n            else:\n                loss_l2 = 0\n            return loss_ce + loss_l2\n    \n        self.loss = get_loss(self.prediction)\n        self.test_loss = get_loss(self.test_prediction)\n        \n        #updates = lasagne.updates.adadelta(self.loss, self.params)\n        updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)\n        \n        if self.mode == 'train':\n            print \"==> compiling train_fn\"\n            self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], \n                                            outputs=[self.prediction, self.loss],\n                                            updates=updates)\n        \n        print \"==> compiling test_fn\"\n        # deterministic version\n        #self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],\n        #                               outputs=[self.test_prediction, self.test_loss])\n    \n        # non deterministic version, as train_fn\n        self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],\n                                       outputs=[self.prediction, self.loss])\n    \n    \n    def say_name(self):\n        return \"tc_net_mod_5khz_small\"\n    \n\n    def read_batch(self, data_raw, batch_index):\n\n        start_index = batch_index * self.batch_size\n        end_index = start_index + self.batch_size\n        \n        data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32)\n        answers = []\n        \n        for i in range(start_index, end_index):\n            answers.append(int(data_raw[i].split(',')[1]))\n            name = data_raw[i].split(',')[0]\n            path = self.png_folder + name + \".png\"\n            im = Image.open(path)\n            data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0\n\n        answers = np.array(answers, dtype=np.int32)\n        return data, answers\n    \n    "
  },
  {
    "path": "theano/networks/tc_net_rnn.py",
    "content": "import random\nimport numpy as np\n\nimport theano\nimport theano.tensor as T\n\nimport lasagne\nfrom lasagne import layers\nfrom lasagne.nonlinearities import rectify, softmax, sigmoid, tanh\n\nimport PIL.Image as Image\nfrom base_network import BaseNetwork\n\nfloatX = theano.config.floatX\n\n\nclass Network(BaseNetwork):\n    \n    def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs):\n        \n        print \"==> not used params in DMN class:\", kwargs.keys()\n        self.train_list_raw = train_list_raw\n        self.test_list_raw = test_list_raw\n        self.png_folder = png_folder\n        self.batch_size = batch_size\n        self.dropout = dropout\n        self.l2 = l2\n        self.mode = mode\n        self.batch_norm = batch_norm\n        self.num_units = rnn_num_units\n        \n        self.input_var = T.tensor4('input_var')\n        self.answer_var = T.ivector('answer_var')\n        \n        print \"==> building network\"\n        example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) #########\n        answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########\n       \n        network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n        # CONV-RELU-POOL 1\n        network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # CONV-RELU-POOL 2\n        network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n\n        \n        # CONV-RELU-POOL 3\n        network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        \n        self.params = layers.get_all_params(network, trainable=True)\n        \n        output = layers.get_output(network)\n        num_channels  = 32 \n        filter_W = 104\n        filter_H = 13\n        # NOTE: these constants are shapes of last pool layer, it can be symbolic \n        # explicit values are better for optimizations\n        \n        channels = []\n        for channel_index in range(num_channels):\n            channels.append(output[:, channel_index, :, :].transpose((0, 2, 1)))\n        \n        rnn_network_outputs = []\n        for channel_index in range(num_channels):\n            rnn_input_var = channels[channel_index]\n            \n            # InputLayer       \n            network = layers.InputLayer(shape=(None, filter_W, filter_H), input_var=rnn_input_var)\n\n            # GRULayer\n            network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)\n            \n            # BatchNormalization Layer\n            if (self.batch_norm):\n                network = layers.BatchNormLayer(incoming=network)\n              \n            # add params \n            self.params += layers.get_all_params(network, trainable=True)\n            \n            rnn_network_outputs.append(layers.get_output(network))\n        \n        all_output_var = T.concatenate(rnn_network_outputs, axis=1)\n        print all_output_var.eval({self.input_var:example}).shape\n        \n        # InputLayer\n        network = layers.InputLayer(shape=(None, self.num_units * num_channels), input_var=all_output_var)\n        \n        # DENSE 1\n        network = layers.DenseLayer(incoming=network, num_units=512, nonlinearity=rectify)\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        if (self.dropout > 0):\n            network = layers.dropout(network, self.dropout)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n        \n        # Last layer: classification\n        network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n    \n        self.params += layers.get_all_params(network, trainable=True)\n        self.prediction = layers.get_output(network)\n    \n        #print \"==> param shapes\", [x.eval().shape for x in self.params]\n        \n        self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()\n        if (self.l2 > 0):\n            self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params, \n                                                                          lasagne.regularization.l2)\n        else:\n            self.loss_l2 = 0\n        self.loss = self.loss_ce + self.loss_l2\n        \n        #updates = lasagne.updates.adadelta(self.loss, self.params)\n        updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)\n        \n        if self.mode == 'train':\n            print \"==> compiling train_fn\"\n            self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], \n                                            outputs=[self.prediction, self.loss],\n                                            updates=updates)\n        \n        print \"==> compiling test_fn\"\n        self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],\n                                       outputs=[self.prediction, self.loss])\n    \n    \n    def say_name(self):\n        return \"tc_net_rnn.GRU.3conv.num_units%d.5khz\" % self.num_units\n    \n    \n    def read_batch(self, data_raw, batch_index):\n\n        start_index = batch_index * self.batch_size\n        end_index = start_index + self.batch_size\n        \n        data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32)\n        answers = []\n        \n        for i in range(start_index, end_index):\n            answers.append(int(data_raw[i].split(',')[1]))\n            name = data_raw[i].split(',')[0]\n            path = self.png_folder + name + \".png\"\n            im = Image.open(path)\n            data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0\n\n        answers = np.array(answers, dtype=np.int32)\n        return data, answers\n    \n    \n"
  },
  {
    "path": "theano/networks/tc_net_rnn_nodense.py",
    "content": "import random\nimport numpy as np\n\nimport theano\nimport theano.tensor as T\n\nimport lasagne\nfrom lasagne import layers\nfrom lasagne.nonlinearities import rectify, softmax, sigmoid, tanh\n\nimport PIL.Image as Image\nfrom base_network import BaseNetwork\n\nfloatX = theano.config.floatX\n\n\nclass Network(BaseNetwork):\n    \n    def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs):\n        \n        print \"==> not used params in DMN class:\", kwargs.keys()\n        self.train_list_raw = train_list_raw\n        self.test_list_raw = test_list_raw\n        self.png_folder = png_folder\n        self.batch_size = batch_size\n        self.dropout = dropout\n        self.l2 = l2\n        self.mode = mode\n        self.batch_norm = batch_norm\n        self.num_units = rnn_num_units\n        \n        self.input_var = T.tensor4('input_var')\n        self.answer_var = T.ivector('answer_var')\n        \n        print \"==> building network\"\n        example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) #########\n        answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########\n       \n        network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n        # CONV-RELU-POOL 1\n        network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # CONV-RELU-POOL 2\n        network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n\n        \n        # CONV-RELU-POOL 3\n        network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        self.params = layers.get_all_params(network, trainable=True)\n        \n        output = layers.get_output(network)\n        num_channels  = 32 \n        filter_W = 104\n        filter_H = 13\n        # NOTE: these constants are shapes of last pool layer, it can be symbolic \n        # explicit values are better for optimizations\n        \n        channels = []\n        for channel_index in range(num_channels):\n            channels.append(output[:, channel_index, :, :].transpose((0, 2, 1)))\n        \n        rnn_network_outputs = []\n        for channel_index in range(num_channels):\n            rnn_input_var = channels[channel_index]\n            \n            # InputLayer       \n            network = layers.InputLayer(shape=(None, filter_W, filter_H), input_var=rnn_input_var)\n\n            # GRULayer\n            network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)\n            \n            # BatchNormalization Layer\n            if (self.batch_norm):\n                network = layers.BatchNormLayer(incoming=network)\n              \n            # add params \n            self.params += layers.get_all_params(network, trainable=True)\n            \n            rnn_network_outputs.append(layers.get_output(network))\n        \n        all_output_var = T.concatenate(rnn_network_outputs, axis=1)\n        print all_output_var.eval({self.input_var:example}).shape\n        \n        # InputLayer\n        network = layers.InputLayer(shape=(None, self.num_units * num_channels), input_var=all_output_var)\n        \n        \"\"\"\n        # DENSE 1\n        network = layers.DenseLayer(incoming=network, num_units=512, nonlinearity=rectify)\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        if (self.dropout > 0):\n            network = layers.dropout(network, self.dropout)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \"\"\"\n        \n        # Last layer: classification\n        network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n    \n        self.params += layers.get_all_params(network, trainable=True)\n        self.prediction = layers.get_output(network)\n    \n        #print \"==> param shapes\", [x.eval().shape for x in self.params]\n        \n        self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()\n        if (self.l2 > 0):\n            self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params, \n                                                                          lasagne.regularization.l2)\n        else:\n            self.loss_l2 = 0\n        self.loss = self.loss_ce + self.loss_l2\n        \n        #updates = lasagne.updates.adadelta(self.loss, self.params)\n        updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)\n        \n        if self.mode == 'train':\n            print \"==> compiling train_fn\"\n            self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], \n                                            outputs=[self.prediction, self.loss],\n                                            updates=updates)\n        \n        print \"==> compiling test_fn\"\n        self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],\n                                       outputs=[self.prediction, self.loss])\n    \n    \n    def say_name(self):\n        return \"tc_net_rnn.3conv.GRU.num_units%d.nodense.5khz\" % self.num_units\n    \n    \n    def read_batch(self, data_raw, batch_index):\n\n        start_index = batch_index * self.batch_size\n        end_index = start_index + self.batch_size\n        \n        data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32)\n        answers = []\n        \n        for i in range(start_index, end_index):\n            answers.append(int(data_raw[i].split(',')[1]))\n            name = data_raw[i].split(',')[0]\n            path = self.png_folder + name + \".png\"\n            im = Image.open(path)\n            data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0\n\n        answers = np.array(answers, dtype=np.int32)\n        return data, answers\n    "
  },
  {
    "path": "theano/networks/tc_net_rnn_onernn.py",
    "content": "import random\nimport numpy as np\n\nimport theano\nimport theano.tensor as T\n\nimport lasagne\nfrom lasagne import layers\nfrom lasagne.nonlinearities import rectify, softmax, sigmoid, tanh\n\nimport PIL.Image as Image\nfrom base_network import BaseNetwork\n\nfloatX = theano.config.floatX\n\n\nclass Network(BaseNetwork):\n    \n    def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs):\n        \n        print \"==> not used params in DMN class:\", kwargs.keys()\n        self.train_list_raw = train_list_raw\n        self.test_list_raw = test_list_raw\n        self.png_folder = png_folder\n        self.batch_size = batch_size\n        self.dropout = dropout\n        self.l2 = l2\n        self.mode = mode\n        self.batch_norm = batch_norm\n        self.num_units = rnn_num_units\n        \n        self.input_var = T.tensor4('input_var')\n        self.answer_var = T.ivector('answer_var')\n        \n        print \"==> building network\"\n        example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) #########\n        answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########\n       \n        network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n        # CONV-RELU-POOL 1\n        network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # CONV-RELU-POOL 2\n        network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n\n        \n        # CONV-RELU-POOL 3\n        network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # CONV-RELU-POOL 4\n        network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        self.params = layers.get_all_params(network, trainable=True)\n        \n        output = layers.get_output(network)\n        output = output.transpose((0, 3, 1, 2))\n        output = output.flatten(ndim=3)\n        \n        # NOTE: these constants are shapes of last pool layer, it can be symbolic \n        # explicit values are better for optimizations\n        num_channels = 32 \n        filter_W = 54\n        filter_H = 8\n        \n            \n        # InputLayer       \n        network = layers.InputLayer(shape=(None, filter_W, num_channels * filter_H), input_var=output)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n        # GRULayer\n        network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        if (self.dropout > 0):\n            network = layers.dropout(network, self.dropout)\n        \n        # Last layer: classification\n        network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n        \n        self.params += layers.get_all_params(network, trainable=True)\n        self.prediction = layers.get_output(network)\n    \n        #print \"==> param shapes\", [x.eval().shape for x in self.params]\n        \n        self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()\n        if (self.l2 > 0):\n            self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params, \n                                                                          lasagne.regularization.l2)\n        else:\n            self.loss_l2 = 0\n        self.loss = self.loss_ce + self.loss_l2\n        \n        #updates = lasagne.updates.adadelta(self.loss, self.params)\n        #updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003) # good one\n        updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.0003)\n                \n        if self.mode == 'train':\n            print \"==> compiling train_fn\"\n            self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], \n                                            outputs=[self.prediction, self.loss],\n                                            updates=updates)\n        \n        print \"==> compiling test_fn\"\n        self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],\n                                       outputs=[self.prediction, self.loss])\n    \n    \n    def say_name(self):\n        return \"tc_net_rnn.4conv.pad.GRU.onernn.num_units%d.5khz\" % self.num_units\n    \n    \n    def read_batch(self, data_raw, batch_index):\n\n        start_index = batch_index * self.batch_size\n        end_index = start_index + self.batch_size\n        \n        data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32)\n        answers = []\n        \n        for i in range(start_index, end_index):\n            answers.append(int(data_raw[i].split(',')[1]))\n            name = data_raw[i].split(',')[0]\n            path = self.png_folder + name + \".png\"\n            im = Image.open(path)\n            data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0\n\n        answers = np.array(answers, dtype=np.int32)\n        return data, answers\n    \n    "
  },
  {
    "path": "theano/networks/tc_net_rnn_onernn_notimepool.py",
    "content": "import random\nimport numpy as np\n\nimport theano\nimport theano.tensor as T\n\nimport lasagne\nfrom lasagne import layers\nfrom lasagne.nonlinearities import rectify, softmax, sigmoid, tanh\n\nimport PIL.Image as Image\nfrom base_network import BaseNetwork\n\nfloatX = theano.config.floatX\n\n\nclass Network(BaseNetwork):\n    \n    def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs):\n        \n        print \"==> not used params in DMN class:\", kwargs.keys()\n        self.train_list_raw = train_list_raw\n        self.test_list_raw = test_list_raw\n        self.png_folder = png_folder\n        self.batch_size = batch_size\n        self.dropout = dropout\n        self.l2 = l2\n        self.mode = mode\n        self.batch_norm = batch_norm\n        self.num_units = rnn_num_units\n        \n        self.input_var = T.tensor4('input_var')\n        self.answer_var = T.ivector('answer_var')\n        \n        print \"==> building network\"\n        example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) #########\n        answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########\n       \n        network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n        # CONV-RELU-POOL 1\n        network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=(2,1), pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # CONV-RELU-POOL 2\n        network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=(2,1), pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n\n        \n        # CONV-RELU-POOL 3\n        network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=(2,1), pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # CONV-RELU-POOL 4\n        network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=(2,1), pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        self.params = layers.get_all_params(network, trainable=True)\n        \n        output = layers.get_output(network)\n        output = output.transpose((0, 3, 1, 2))\n        output = output.flatten(ndim=3)\n        \n        # NOTE: these constants are shapes of last pool layer, it can be symbolic \n        # explicit values are better for optimizations\n        num_channels = 32 \n        filter_W = 852\n        filter_H = 8\n        \n            \n        # InputLayer       \n        network = layers.InputLayer(shape=(None, filter_W, num_channels * filter_H), input_var=output)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n        # GRULayer\n        network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        if (self.dropout > 0):\n            network = layers.dropout(network, self.dropout)\n        \n        # Last layer: classification\n        network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n        \n        self.params += layers.get_all_params(network, trainable=True)\n        self.prediction = layers.get_output(network)\n    \n        #print \"==> param shapes\", [x.eval().shape for x in self.params]\n        \n        self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()\n        if (self.l2 > 0):\n            self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params, \n                                                                          lasagne.regularization.l2)\n        else:\n            self.loss_l2 = 0\n        self.loss = self.loss_ce + self.loss_l2\n        \n        #updates = lasagne.updates.adadelta(self.loss, self.params)\n        #updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003) # good one\n        updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.001)\n                \n        if self.mode == 'train':\n            print \"==> compiling train_fn\"\n            self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], \n                                            outputs=[self.prediction, self.loss],\n                                            updates=updates)\n        \n        print \"==> compiling test_fn\"\n        self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],\n                                       outputs=[self.prediction, self.loss])\n    \n    \n    \n    def say_name(self):\n        return \"tc_net_rnn.4conv.pad.GRU.onernn.notimepool.num_units%d.5khz\" % self.num_units\n\n\n    def read_batch(self, data_raw, batch_index):\n\n        start_index = batch_index * self.batch_size\n        end_index = start_index + self.batch_size\n        \n        data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32)\n        answers = []\n        \n        for i in range(start_index, end_index):\n            answers.append(int(data_raw[i].split(',')[1]))\n            name = data_raw[i].split(',')[0]\n            path = self.png_folder + name + \".png\"\n            im = Image.open(path)\n            data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0\n\n        answers = np.array(answers, dtype=np.int32)\n        return data, answers\n    \n                \n"
  },
  {
    "path": "theano/networks/tc_net_rnn_shared.py",
    "content": "import random\nimport numpy as np\n\nimport theano\nimport theano.tensor as T\n\nimport lasagne\nfrom lasagne import layers\nfrom lasagne.nonlinearities import rectify, softmax, sigmoid, tanh\n\nimport PIL.Image as Image\nfrom base_network import BaseNetwork\n\nfloatX = theano.config.floatX\n\n\nclass Network(BaseNetwork):\n    \n    def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs):\n        \n        print \"==> not used params in DMN class:\", kwargs.keys()\n        self.train_list_raw = train_list_raw\n        self.test_list_raw = test_list_raw\n        self.png_folder = png_folder\n        self.batch_size = batch_size\n        self.dropout = dropout\n        self.l2 = l2\n        self.mode = mode\n        self.batch_norm = batch_norm\n        self.num_units = rnn_num_units\n        \n        self.input_var = T.tensor4('input_var')\n        self.answer_var = T.ivector('answer_var')\n        \n        print \"==> building network\"\n        example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) #########\n        answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########\n       \n        network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n        # CONV-RELU-POOL 1\n        network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # CONV-RELU-POOL 2\n        network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n\n        \n        # CONV-RELU-POOL 3\n        network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        self.params = layers.get_all_params(network, trainable=True)\n        \n        output = layers.get_output(network)\n        num_channels  = 32 \n        filter_W = 104\n        filter_H = 13\n        # NOTE: these constants are shapes of last pool layer, it can be symbolic \n        # explicit values are better for optimizations\n        \n        channels = []\n        for channel_index in range(num_channels):\n            channels.append(output[:, channel_index, :, :].transpose((0, 2, 1)))\n        \n        rnn_network_outputs = []\n        W_in_to_updategate = None\n        W_hid_to_updategate = None\n        b_updategate = None\n        W_in_to_resetgate = None\n        W_hid_to_resetgate = None\n        b_resetgate = None\n        W_in_to_hidden_update = None\n        W_hid_to_hidden_update = None\n        b_hidden_update = None\n        \n        for channel_index in range(num_channels):\n            rnn_input_var = channels[channel_index]\n            \n            # InputLayer       \n            network = layers.InputLayer(shape=(None, filter_W, filter_H), input_var=rnn_input_var)\n\n            if (channel_index == 0):\n                # GRULayer\n                network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)\n                W_in_to_updategate = network.W_in_to_updategate\n                W_hid_to_updategate = network.W_hid_to_updategate\n                b_updategate = network.b_updategate\n                W_in_to_resetgate = network.W_in_to_resetgate\n                W_hid_to_resetgate = network.W_hid_to_resetgate\n                b_resetgate = network.b_resetgate\n                W_in_to_hidden_update = network.W_in_to_hidden_update\n                W_hid_to_hidden_update = network.W_hid_to_hidden_update\n                b_hidden_update = network.b_hidden_update\n                \n                # add params \n                self.params += layers.get_all_params(network, trainable=True)\n\n            else:\n                # GRULayer, but shared\n                network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True,\n                            resetgate=layers.Gate(W_in=W_in_to_resetgate, W_hid=W_hid_to_resetgate, b=b_resetgate),\n                            updategate=layers.Gate(W_in=W_in_to_updategate, W_hid=W_hid_to_updategate, b=b_updategate),\n                            hidden_update=layers.Gate(W_in=W_in_to_hidden_update, W_hid=W_hid_to_hidden_update, b=b_hidden_update))\n                            \n                \n            \n            rnn_network_outputs.append(layers.get_output(network))\n        \n        all_output_var = T.concatenate(rnn_network_outputs, axis=1)\n        print all_output_var.eval({self.input_var:example}).shape\n        \n        # InputLayer\n        network = layers.InputLayer(shape=(None, self.num_units * num_channels), input_var=all_output_var)\n        \n        # BatchNormalization Layer\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # Last layer: classification\n        network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n    \n        self.params += layers.get_all_params(network, trainable=True)\n        self.prediction = layers.get_output(network)\n    \n        #print \"==> param shapes\", [x.eval().shape for x in self.params]\n        \n        self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()\n        if (self.l2 > 0):\n            self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params, \n                                                                          lasagne.regularization.l2)\n        else:\n            self.loss_l2 = 0\n        self.loss = self.loss_ce + self.loss_l2\n        \n        #updates = lasagne.updates.adadelta(self.loss, self.params)\n        updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)\n        \n        if self.mode == 'train':\n            print \"==> compiling train_fn\"\n            self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], \n                                            outputs=[self.prediction, self.loss],\n                                            updates=updates)\n        \n        print \"==> compiling test_fn\"\n        self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],\n                                       outputs=[self.prediction, self.loss])\n    \n    \n    def say_name(self):\n        return \"tc_net_rnn.3conv.GRU.shared.num_units%d.5khz\" % self.num_units\n    \n\n    def read_batch(self, data_raw, batch_index):\n\n        start_index = batch_index * self.batch_size\n        end_index = start_index + self.batch_size\n        \n        data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32)\n        answers = []\n        \n        for i in range(start_index, end_index):\n            answers.append(int(data_raw[i].split(',')[1]))\n            name = data_raw[i].split(',')[0]\n            path = self.png_folder + name + \".png\"\n            im = Image.open(path)\n            data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0\n\n        answers = np.array(answers, dtype=np.int32)\n        return data, answers\n    "
  },
  {
    "path": "theano/networks/tc_net_rnn_shared_pad.py",
    "content": "import random\nimport numpy as np\n\nimport theano\nimport theano.tensor as T\n\nimport lasagne\nfrom lasagne import layers\nfrom lasagne.nonlinearities import rectify, softmax, sigmoid, tanh\n\nimport PIL.Image as Image\nfrom base_network import BaseNetwork\n\nfloatX = theano.config.floatX\n\n\nclass Network(BaseNetwork):\n    \n    def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs):\n        \n        print \"==> not used params in DMN class:\", kwargs.keys()\n        self.train_list_raw = train_list_raw\n        self.test_list_raw = test_list_raw\n        self.png_folder = png_folder\n        self.batch_size = batch_size\n        self.dropout = dropout\n        self.l2 = l2\n        self.mode = mode\n        self.batch_norm = batch_norm\n        self.num_units = rnn_num_units\n        \n        self.input_var = T.tensor4('input_var')\n        self.answer_var = T.ivector('answer_var')\n        \n        print \"==> building network\"\n        example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) #########\n        answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########\n       \n        network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n        # CONV-RELU-POOL 1\n        network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # CONV-RELU-POOL 2\n        network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n\n        \n        # CONV-RELU-POOL 3\n        network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # CONV-RELU-POOL 4\n        network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n                \n        self.params = layers.get_all_params(network, trainable=True)\n        \n        output = layers.get_output(network)\n        num_channels  = 32 \n        filter_W = 54\n        filter_H = 8\n        \n        # NOTE: these constants are shapes of last pool layer, it can be symbolic \n        # explicit values are better for optimizations\n        \n        channels = []\n        for channel_index in range(num_channels):\n            channels.append(output[:, channel_index, :, :].transpose((0, 2, 1)))\n        \n        rnn_network_outputs = []\n        W_in_to_updategate = None\n        W_hid_to_updategate = None\n        b_updategate = None\n        W_in_to_resetgate = None\n        W_hid_to_resetgate = None\n        b_resetgate = None\n        W_in_to_hidden_update = None\n        W_hid_to_hidden_update = None\n        b_hidden_update = None\n        \n        for channel_index in range(num_channels):\n            rnn_input_var = channels[channel_index]\n            \n            # InputLayer       \n            network = layers.InputLayer(shape=(None, filter_W, filter_H), input_var=rnn_input_var)\n\n            if (channel_index == 0):\n                # GRULayer\n                network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)\n                W_in_to_updategate = network.W_in_to_updategate\n                W_hid_to_updategate = network.W_hid_to_updategate\n                b_updategate = network.b_updategate\n                W_in_to_resetgate = network.W_in_to_resetgate\n                W_hid_to_resetgate = network.W_hid_to_resetgate\n                b_resetgate = network.b_resetgate\n                W_in_to_hidden_update = network.W_in_to_hidden_update\n                W_hid_to_hidden_update = network.W_hid_to_hidden_update\n                b_hidden_update = network.b_hidden_update\n                \n                # add params \n                self.params += layers.get_all_params(network, trainable=True)\n\n            else:\n                # GRULayer, but shared\n                network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True,\n                            resetgate=layers.Gate(W_in=W_in_to_resetgate, W_hid=W_hid_to_resetgate, b=b_resetgate),\n                            updategate=layers.Gate(W_in=W_in_to_updategate, W_hid=W_hid_to_updategate, b=b_updategate),\n                            hidden_update=layers.Gate(W_in=W_in_to_hidden_update, W_hid=W_hid_to_hidden_update, b=b_hidden_update))\n                            \n                \n            \n            rnn_network_outputs.append(layers.get_output(network))\n        \n        all_output_var = T.concatenate(rnn_network_outputs, axis=1)\n        print all_output_var.eval({self.input_var:example}).shape\n        \n        # InputLayer\n        network = layers.InputLayer(shape=(None, self.num_units * num_channels), input_var=all_output_var)\n        \n        # Dropout Layer\n        if (self.dropout > 0):\n            network = layers.dropout(network, self.dropout)\n        \n        # BatchNormalization Layer\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # Last layer: classification\n        network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n    \n        self.params += layers.get_all_params(network, trainable=True)\n        self.prediction = layers.get_output(network)\n    \n        #print \"==> param shapes\", [x.eval().shape for x in self.params]\n        \n        self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()\n        if (self.l2 > 0):\n            self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params, \n                                                                          lasagne.regularization.l2)\n        else:\n            self.loss_l2 = 0\n        self.loss = self.loss_ce + self.loss_l2\n        \n        #updates = lasagne.updates.adadelta(self.loss, self.params)\n        updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)\n        \n        if self.mode == 'train':\n            print \"==> compiling train_fn\"\n            self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], \n                                            outputs=[self.prediction, self.loss],\n                                            updates=updates)\n        \n        print \"==> compiling test_fn\"\n        self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],\n                                       outputs=[self.prediction, self.loss])\n    \n    \n    def say_name(self):\n        return \"tc_net_rnn.4conv.pad.GRU.shared.num_units%d.5khz\" % self.num_units\n    \n    \n    def read_batch(self, data_raw, batch_index):\n\n        start_index = batch_index * self.batch_size\n        end_index = start_index + self.batch_size\n        \n        data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32)\n        answers = []\n        \n        for i in range(start_index, end_index):\n            answers.append(int(data_raw[i].split(',')[1]))\n            name = data_raw[i].split(',')[0]\n            path = self.png_folder + name + \".png\"\n            im = Image.open(path)\n            data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0\n\n        answers = np.array(answers, dtype=np.int32)\n        return data, answers\n    \n"
  },
  {
    "path": "theano/networks/tc_net_rnn_shared_pad_augm.py",
    "content": "import random\nimport numpy as np\n\nimport theano\nimport theano.tensor as T\n\nimport lasagne\nfrom lasagne import layers\nfrom lasagne.nonlinearities import rectify, softmax, sigmoid, tanh\n\nimport PIL.Image as Image\nfrom base_network import BaseNetwork\n\nfloatX = theano.config.floatX\n\n\nclass Network(BaseNetwork):\n    \n    def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs):\n        \n        print \"==> not used params in DMN class:\", kwargs.keys()\n        self.train_list_raw = train_list_raw\n        self.test_list_raw = test_list_raw\n        self.png_folder = png_folder\n        self.batch_size = batch_size\n        self.dropout = dropout\n        self.l2 = l2\n        self.mode = mode\n        self.batch_norm = batch_norm\n        self.num_units = rnn_num_units\n        \n        self.input_var = T.tensor4('input_var')\n        self.answer_var = T.ivector('answer_var')\n        \n        print \"==> building network\"\n        example = np.random.uniform(size=(self.batch_size, 1, 128, 768), low=0.0, high=1.0).astype(np.float32) #########\n        answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########\n       \n        network = layers.InputLayer(shape=(None, 1, 128, 768), input_var=self.input_var)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n        # CONV-RELU-POOL 1\n        network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # CONV-RELU-POOL 2\n        network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n\n        \n        # CONV-RELU-POOL 3\n        network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # CONV-RELU-POOL 4\n        network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), \n                                     stride=1, nonlinearity=rectify)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        self.params = layers.get_all_params(network, trainable=True)\n        \n        output = layers.get_output(network)\n        num_channels  = 32 \n        filter_W = 48\n        filter_H = 8\n        \n        # NOTE: these constants are shapes of last pool layer, it can be symbolic \n        # explicit values are better for optimizations\n        \n        channels = []\n        for channel_index in range(num_channels):\n            channels.append(output[:, channel_index, :, :].transpose((0, 2, 1)))\n        \n        rnn_network_outputs = []\n        W_in_to_updategate = None\n        W_hid_to_updategate = None\n        b_updategate = None\n        W_in_to_resetgate = None\n        W_hid_to_resetgate = None\n        b_resetgate = None\n        W_in_to_hidden_update = None\n        W_hid_to_hidden_update = None\n        b_hidden_update = None\n        \n        for channel_index in range(num_channels):\n            rnn_input_var = channels[channel_index]\n            \n            # InputLayer       \n            network = layers.InputLayer(shape=(None, filter_W, filter_H), input_var=rnn_input_var)\n\n            if (channel_index == 0):\n                # GRULayer\n                network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)\n                W_in_to_updategate = network.W_in_to_updategate\n                W_hid_to_updategate = network.W_hid_to_updategate\n                b_updategate = network.b_updategate\n                W_in_to_resetgate = network.W_in_to_resetgate\n                W_hid_to_resetgate = network.W_hid_to_resetgate\n                b_resetgate = network.b_resetgate\n                W_in_to_hidden_update = network.W_in_to_hidden_update\n                W_hid_to_hidden_update = network.W_hid_to_hidden_update\n                b_hidden_update = network.b_hidden_update\n                \n                # add params \n                self.params += layers.get_all_params(network, trainable=True)\n\n            else:\n                # GRULayer, but shared\n                network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True,\n                            resetgate=layers.Gate(W_in=W_in_to_resetgate, W_hid=W_hid_to_resetgate, b=b_resetgate),\n                            updategate=layers.Gate(W_in=W_in_to_updategate, W_hid=W_hid_to_updategate, b=b_updategate),\n                            hidden_update=layers.Gate(W_in=W_in_to_hidden_update, W_hid=W_hid_to_hidden_update, b=b_hidden_update))\n                            \n                \n            \n            rnn_network_outputs.append(layers.get_output(network))\n        \n        all_output_var = T.concatenate(rnn_network_outputs, axis=1)\n        print all_output_var.eval({self.input_var:example}).shape\n        \n        # InputLayer\n        network = layers.InputLayer(shape=(None, self.num_units * num_channels), input_var=all_output_var)\n        \n        # Dropout Layer\n        if (self.dropout > 0):\n            network = layers.dropout(network, self.dropout)\n        \n        # BatchNormalization Layer\n        if (self.batch_norm):\n            network = layers.BatchNormLayer(incoming=network)\n        \n        # Last layer: classification\n        network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)\n        print layers.get_output(network).eval({self.input_var:example}).shape\n        \n    \n        self.params += layers.get_all_params(network, trainable=True)\n        self.prediction = layers.get_output(network)\n    \n        #print \"==> param shapes\", [x.eval().shape for x in self.params]\n        \n        self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()\n        if (self.l2 > 0):\n            self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params, \n                                                                          lasagne.regularization.l2)\n        else:\n            self.loss_l2 = 0\n        self.loss = self.loss_ce + self.loss_l2\n        \n        #updates = lasagne.updates.adadelta(self.loss, self.params)\n        updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)\n        \n        if self.mode == 'train':\n            print \"==> compiling train_fn\"\n            self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], \n                                            outputs=[self.prediction, self.loss],\n                                            updates=updates)\n        \n        print \"==> compiling test_fn\"\n        self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],\n                                       outputs=[self.prediction, self.loss])\n    \n    \n    def say_name(self):\n        return \"tc_net_rnn.4conv.pad.GRU.shared.num_units%d.5khz.augm\" % self.num_units\n    \n\n    def read_batch(self, data_raw, batch_index):\n\n        start_index = batch_index * self.batch_size\n        end_index = start_index + self.batch_size\n        \n        data = np.zeros((self.batch_size, 1, 128, 768), dtype=np.float32)\n        answers = []\n        \n        for i in range(start_index, end_index):\n            answers.append(int(data_raw[i].split(',')[1]))\n            name = data_raw[i].split(',')[0]\n            path = self.png_folder + name + \".png\"\n            im = Image.open(path)\n            offset = random.randint(0, 90)\n            data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, offset:offset+768] / 256.0\n\n        answers = np.array(answers, dtype=np.int32)\n        return data, answers\n    "
  },
  {
    "path": "theano/plot.py",
    "content": "import numpy as np\nimport matplotlib\nmatplotlib.use('Agg')\nimport matplotlib.pyplot as plt\nimport sys\nimport argparse\nimport os\n\n\n#parsing arguments\nparser = argparse.ArgumentParser()\nparser.add_argument('--plot', type=str, default='plot.png', help='plotfile name with .png')\nparser.add_argument('--log', type=str, default='log.txt', help='log file name')\nparser.add_argument('--winVal', type=int, default='200', help='window for Val')\nparser.add_argument('--winTrain', type=int, default='200', help='window for Train')\nparser.add_argument('--no-legend', dest='legend', action='store_false')\nparser.add_argument('--no-accuracy', dest='accuracy', action='store_false')\nparser.add_argument('--no-loss', dest='loss', action='store_false')\nparser.add_argument('--start_epoch', type=float, default=-1.0, help='start plotting from that epoch')\nparser.set_defaults(loss=True)\nparser.set_defaults(legend=True)\nparser.set_defaults(accuracy=True)\n\nargs = parser.parse_args()\n\nplotname = args.plot\nwindowVal = args.winVal\nwindowTrain = args.winTrain\naccuracy = []\n\n\ndef movingAverage(loss, window):\n    mas = []\n    for i in range(len(loss)):\n        j = i - window + 1\n        if (j < 0):\n            j = 0\n        sum = 0.0\n        for k in range(window):\n            sum += loss[j + k]\n        mas.append(sum / window)\n    return mas\n\n\ndef plotTrainVal(filename, index, plotLabel):\n    valx = []\n    valy = []\n    trainx = []\n    trainy = []\n    train_accuracyx = []\n    train_accuracyy = []\n    val_accuracyx = []\n    val_accuracyy = []\n    \n    with open(filename, 'r') as logfile: \n        for st in logfile.readlines():\n            head = st.split('\\t')[0].strip()\n\n            if (head[:7] == 'testing' or head[:8] == 'training'):\n                iteration_expr = head[head.find(':')+1:]\n                divpos = iteration_expr.find('/')\n                first = iteration_expr[:divpos]\n                iterations_per_epoch = float(iteration_expr[divpos+1:])\n                dotpos = first.find('.')\n                epoch = float(first[:dotpos])\n                iteration = float(first[dotpos+1:])\n                x = epoch + iteration / iterations_per_epoch\n                \n                st_loss = st[st.find(\"avg_loss\"):]\n                cur_loss = float(st_loss[st_loss.find(':')+1:st_loss.find('\\t')])\n                \n                if (head[:7] == 'testing'):\n                    valx.append(x)\n                    valy.append(cur_loss)\n                else:\n                    trainx.append(x)\n                    trainy.append(cur_loss)\n            \n            if st.strip()[:8] == \"accuracy\":\n                cur_accuracy = float(st[st.find(':')+1:st.find(\"percent\")]) / 100.0\n                if (len(train_accuracyx) > len(val_accuracyx)):\n                    val_accuracyx.append(valx[-1])\n                    val_accuracyy.append(cur_accuracy)\n                else:\n                    train_accuracyx.append(trainx[-1])\n                    train_accuracyy.append(cur_accuracy)\n\n    while(len(valx) > 0 and valx[0] < args.start_epoch):\n        valx = valx[1:]\n        valy = valy[1:]\n\n    while(len(trainx) > 0 and trainx[0] < args.start_epoch):\n        trainx = trainx[1:]\n        trainy = trainy[1:]\n\n\n    #window config\n    wndVal = min(windowVal, int(0.8 * len(valy)))\n    wndTrain = min(windowTrain, int(0.8 * len(trainy)))\n    \n    print \"Train length: \", len(trainy), \" \\t\\t window: \", wndTrain\n    print \"Val length: \", len(valy), \" \\t\\t window: \", wndVal\n    \n    #movAvg and correcting length\n    #valy = movingAverage(valy, wndVal)\n    #trainy = movingAverage(trainy, wndTrain)\n    #valx = valx[:len(valy)]\n    #trainx = trainx[:len(trainy)]\n    \n\n    #plotting\n    greenDiff = 50\n    redBlueDiff = 50\n    \n    if (args.loss):\n        plt.plot(trainx, trainy, '#00' + hex(index * greenDiff)[2:] \n                + hex(256 - index * redBlueDiff)[2:],\n                label=plotLabel + \" train\")\n        plt.hold(True)\n\n        plt.plot(valx, valy, '#' + hex(256 - index * redBlueDiff)[2:] \n                + hex(index * greenDiff)[2:] + '00',\n                label=plotLabel + \" validation\")\n        plt.hold(True)\n    \n    if (args.accuracy):\n        plt.plot(train_accuracyx, train_accuracyy, '#000000',\n                label=plotLabel + \" train_accuracy\")\n        plt.hold(True)\n\n        plt.plot(val_accuracyx, val_accuracyy, '#00FF00',\n                label=plotLabel + \" val_accuracy\")\n        plt.hold(True)\n                \n    print \"plot index =\", index\n    for (x, y) in zip(val_accuracyx, val_accuracyy):\n        print \"\\tepoch = %.0f, accuracy = %f\" % (x - 1, y)\n    print '\\tMax: %f // Epoch: %d' % (max(val_accuracyy), val_accuracyx[val_accuracyy.index(max(val_accuracyy))])\n\n\nplotTrainVal(args.log, 1, args.log)\n\n\nif (args.legend):\n    plt.legend(loc='upper right', fontsize='x-small')\nplt.gcf().savefig(plotname)\n\n"
  }
]