Repository: uber-common/differentiable-plasticity Branch: master Commit: 5bd29a18cc20 Files: 106 Total size: 747.0 KB Directory structure: gitextract_rz2xt4mk/ ├── .gitignore ├── LICENSE ├── NOTICE.md ├── README.md ├── awd-lstm-lm/ │ ├── .gitignore │ ├── LICENSE │ ├── OpusHdfsCopy.py │ ├── OpusPrepare.sh │ ├── README.md │ ├── TESTCOMMAND │ ├── data.py │ ├── embed_regularize.py │ ├── finetune.py │ ├── generate.py │ ├── getdata.sh │ ├── locked_dropout.py │ ├── main.py │ ├── model.py │ ├── model.py.old │ ├── mylstm.py │ ├── mylstm.py.orig │ ├── opus.docker.old │ ├── plotresults.py │ ├── plotresultssingle.py │ ├── pointer.py │ ├── request_devbox.json │ ├── request_full.json │ ├── request_opus.json │ ├── request_opus.json.old │ ├── request_plast.json │ ├── splitcross.py │ ├── test.py │ ├── tmp.py │ ├── utils.py │ └── weight_drop.py ├── images/ │ ├── OpusHdfsCopy.py │ ├── README.md │ ├── anim.py │ ├── images.py │ ├── plotresults.py │ ├── request.json │ ├── showcompletion_eta.py │ └── testpics.py ├── maze/ │ ├── OpusHdfsCopy.py │ ├── README.md │ ├── anim.py │ ├── animbatch.py │ ├── batch.py │ ├── makefigure.py │ ├── makemaze.py │ ├── maze.py │ ├── opus.docker │ ├── opus.docker.old │ ├── plotfigure.py │ ├── plotresults.py │ ├── request.json │ ├── request_devbox.json │ ├── request_modplast.json │ ├── request_modul.json │ ├── request_plastic.json │ ├── request_rnn.json │ ├── request_rnn100neurons.json │ ├── testbatch.py │ └── testnobatch.py ├── omniglot/ │ ├── .ipynb_checkpoints/ │ │ └── Omniglot Data Loading-checkpoint.ipynb │ ├── README.md │ ├── omniglot.py │ ├── opus.docker │ ├── plotresults.py │ ├── request.json │ └── test_omniglot_allseeds.py ├── opus.docker ├── request_devbox.json ├── request_lstm.json ├── request_lstm_simple.json ├── simple/ │ ├── .gitignore │ ├── OpusHdfsCopy.py │ ├── README.md │ ├── full.py │ ├── lstm.py │ ├── opus.docker │ ├── plotresults.py │ ├── request.json │ ├── request_lstm.json │ ├── simple.py │ └── simplest.py ├── simplemaze/ │ ├── README.md │ └── maze.py └── sr/ ├── .gitignore ├── OpusHdfsCopy.py ├── README.md ├── anim.py ├── cueshown0.dat.npy ├── makefigure.py ├── modul.py ├── modulator0.dat.npy ├── opus.docker.old ├── plotmodulator.py ├── plotresults.py ├── request.json ├── request_batch.json ├── request_easy.json ├── rewardsprevstep0.dat.npy ├── srbatch.py ├── srrun.py └── srrun1episode.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ */*.data */data */*.pt */*.swp */*.txt */*.png */*.dat */tmp *.swp *.txt *.png *.gif *.dat loss* grads_* __pycache__/* */__pycache__/* */__pycache__/ torchmod* params* tmp*/ ================================================ FILE: LICENSE ================================================ "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by the text below. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under this License. This License governs use of the accompanying Work, and your use of the Work constitutes acceptance of this License. You may use this Work for any non-commercial purpose, subject to the restrictions in this License. Some purposes which can be non-commercial are teaching, academic research, and personal experimentation. You may also distribute this Work with books or other teaching materials, or publish the Work on websites, that are intended to teach the use of the Work. You may not use or distribute this Work, or any derivative works, outputs, or results from the Work, in any form for commercial purposes. Non-exhaustive examples of commercial purposes would be running business operations, licensing, leasing, or selling the Work, or distributing the Work for use with commercial products. You may modify this Work and distribute the modified Work for non-commercial purposes, however, you may not grant rights to the Work or derivative works that are broader than or in conflict with those provided by this License. For example, you may not distribute modifications of the Work under terms that would permit commercial use, or under terms that purport to require the Work or derivative works to be sublicensed to others. In return, we require that you agree: 1. Not to remove any copyright or other notices from the Work. 2. That if you distribute the Work in Source or Object form, you will include a verbatim copy of this License. 3. That if you distribute derivative works of the Work in Source form, you do so only under a license that includes all of the provisions of this License and is not in conflict with this License, and if you distribute derivative works of the Work solely in Object form you do so only under a license that complies with this License. 4. That if you have modified the Work or created derivative works from the Work, and distribute such modifications or derivative works, you will cause the modified files to carry prominent notices so that recipients know that they are not receiving the original Work. Such notices must state: (i) that you have changed the Work; and (ii) the date of any changes. 5. If you publicly use the Work or any output or result of the Work, you will provide a notice with such use that provides any person who uses, views, accesses, interacts with, or is otherwise exposed to the Work (i) with information of the nature of the Work, (ii) with a link to the Work, and (iii) a notice that the Work is available under this License. 6. THAT THE WORK COMES "AS IS", WITH NO WARRANTIES. THIS MEANS NO EXPRESS, IMPLIED OR STATUTORY WARRANTY, INCLUDING WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE OR ANY WARRANTY OF TITLE OR NON-INFRINGEMENT. ALSO, YOU MUST PASS THIS DISCLAIMER ON WHENEVER YOU DISTRIBUTE THE WORK OR DERIVATIVE WORKS. 7. THAT NEITHER UBER TECHNOLOGIES, INC. NOR ANY OF ITS AFFILIATES, SUPPLIERS, SUCCESSORS, NOR ASSIGNS WILL BE LIABLE FOR ANY DAMAGES RELATED TO THE WORK OR THIS LICENSE, INCLUDING DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL OR INCIDENTAL DAMAGES, TO THE MAXIMUM EXTENT THE LAW PERMITS, NO MATTER WHAT LEGAL THEORY IT IS BASED ON. ALSO, YOU MUST PASS THIS LIMITATION OF LIABILITY ON WHENEVER YOU DISTRIBUTE THE WORK OR DERIVATIVE WORKS. 8. That if you sue anyone over patents that you think may apply to the Work or anyone's use of the Work, your license to the Work ends automatically. 9. That your rights under the License end automatically if you breach it in any way. 10. Uber Technologies, Inc. reserves all rights not expressly granted to you in this License. ================================================ FILE: NOTICE.md ================================================ The `awd-lstm-lm` directory (language modelling with plastic LSTMs) was forked from the [Salesforce Language Model Toolkit](https://github.com/salesforce/awd-lstm-lm/), which implements the baseline language modelling system used in our experiments (this baseline is the model described in [Merity et al. (2017), Regularizing and Optimizing LSTM Language Models](https://arxiv.org/abs/1708.02182). License for the Salesforce Language Model Toolkit: Copyright (c) 2017, All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: README.md ================================================ ## Differentiable plasticity This repo contains implementations of the algorithms described in [Differentiable plasticity: training plastic networks with gradient descent](https://arxiv.org/abs/1804.02464), a research paper from Uber AI Labs. NOTE: please see also our more recent work on differentiable *neuromodulated* plasticity: the "[backpropamine](https://github.com/uber-research/backpropamine)" framework. There are four different experiments included here: - `simple`: Binary pattern memorization and completion. Read this one first! - `images`: Natural image memorization and completion - `omniglot`: One-shot learning in the Omniglot task - `maze`: Maze exploration task (reinforcement learning) We strongly recommend studying the `simple/simplest.py` program first, as it is deliberately kept as simple as possible while showing full-fledged differentiable plasticity learning. The code requires Python 3 and PyTorch 0.3.0 or later. The `images` code also requires scikit-learn. By default our code requires a GPU, but most programs can be run on CPU by simply uncommenting the relevant lines (for others, remove all occurrences of `.cuda()`). To comment, please open an issue. We will not be accepting pull requests but encourage further study of this research. To learn more, check out our accompanying article on the [Uber Engineering Blog](https://eng.uber.com/differentiable-plasticity). ## Copyright and licensing information Copyright (c) 2018-2019 Uber Technologies, Inc. All code is licensed under the Uber Non-Commercial License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at the root directory of this project. See the LICENSE file in this repository for the specific language governing permissions and limitations under the License. ================================================ FILE: awd-lstm-lm/.gitignore ================================================ maintmp.py HDFS/ *.patch model_* results_* *.pt *.swp __pycache__/ data/ corpus* ================================================ FILE: awd-lstm-lm/LICENSE ================================================ BSD 3-Clause License Copyright (c) 2017, All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: awd-lstm-lm/OpusHdfsCopy.py ================================================ import os import os.path def checkHdfs(): return os.path.isfile('/opt/hadoop/latest/bin/hdfs') def transferFileToHdfsPath(sourcepath, targetpath): hdfspath = targetpath targetdir = os.path.dirname(targetpath) os.system('/opt/hadoop/latest/bin/hdfs dfs -mkdir -p {}'.format(targetdir)) result = os.system( '/opt/hadoop/latest/bin/hdfs dfs -copyFromLocal -f {} {}'.format(sourcepath, hdfspath) ) if result != 0: raise OSError('Cannot copyFromLocal {} {} returned {}'.format(sourcepath, hdfspath, result)) def transferFileToHdfsDir(sourcepath, targetdir): hdfspath = os.path.join(targetdir, os.path.basename(sourcepath)) os.system('/opt/hadoop/latest/bin/hdfs dfs -mkdir -p {}'.format(targetdir)) result = os.system( '/opt/hadoop/latest/bin/hdfs dfs -copyFromLocal -f {} {}'.format(sourcepath, hdfspath) ) if result != 0: raise OSError('Cannot copyFromLocal {} {} returned {}'.format(sourcepath, hdfspath, result)) ================================================ FILE: awd-lstm-lm/OpusPrepare.sh ================================================ cd /home/work # $HOME is not the same as ~ !!!! # Installing pyenv and putting it in the path curl -L https://raw.githubusercontent.com/yyuu/pyenv-installer/master/bin/pyenv-installer | bash echo "HOME is $HOME" echo 'export PATH="$HOME/.pyenv/bin:$PATH" eval "$(pyenv init -)" eval "$(pyenv virtualenv-init -)" ' > $HOME/.bashrc # Installing python 3.5 and making it default source $HOME/.bashrc pyenv install 3.5.2 pyenv local 3.5.2 # Note: when we exit the script, environments go away and we need to re-source ~/.bashrc and re-run pyenv local 3.5.2 # Installing numpy and PyTorch pip install numpy==1.14 pip install torch apt-get install unzip # Some machines seem not to have it? # Downloading the data sh ./getdata.sh ================================================ FILE: awd-lstm-lm/README.md ================================================ # LSTMs with neuromodulated plasticity This code implements language modelling on the Penn Treebank dataset, using LSTMs with neuromodulated plasticity ("backpropamine"), as described in [Backpropamine: training self-modifying neural networks with differentiable neuromodulated plasticity (Miconi et al., ICLR 2016)](https://openreview.net/forum?id=r1lrAiA5Ym), a paper from Uber AI labs. The code is forked from [Salesforce Language model toolkit](https://github.com/Smerity/awd-lstm-lm) and uses most of their parameters and design choices. The main differences are that we do not implement DropConnect and reduce batch size to 6 for computational reasons. This code requires Python 3 and PyTorch 1.0. To comment, please open an issue. Note that the code is provided "as is": we cannot provide support or accept pull requests at this time. ## Usage Before running this code, run `getdata.sh` to obtain the Penn Treebank data. Plasticity and neuromodulation: `python3 main.py --batch_size 6 --data data/penn --dropouti 0.4 --dropouth 0.25 --epoch 500 --save PTB.pt --wdrop 0 --model PLASTICLSTM --modultype modplasth2mod --modulout fanout --nhid 1149 --alphatype perneuron --asgdtime 125 --agdiv 1149` Plasticity without neuromodulation: `python3 main.py --batch_size 6 --data data/penn --dropouti 0.4 --dropouth 0.25 --epoch 500 --save PTB.pt --wdrop 0 --model PLASTICLSTM --modultype none --modulout none --nhid 1149 --alphatype perneuron --asgdtime 125 --agdiv 1149` No plasticity, just plain LSTM: `python3 main.py --batch_size 6 --data data/penn --dropouti 0.4 --dropouth 0.25 --epoch 500 --save PTB.pt --wdrop 0 --model MYLSTM --modultype modplasth2mod --modulout fanout --nhid 1150 --alphatype full --asgdtime 125 --agdiv 1150` Note that in all of the above, we use per-neuron plasticity coefficients and reduce the number of neurons in plastic LSTMs (`nhid`) to ensure that plastic LSTMs do not have more trainable parameters. ## Code organization. The main program is `main.py`. There is some interface code in `model.py`. The code for actual plastic LSTMs is in `mylstm.py`. ## Plastic LSTMs The code for plastic LSTMs is relatively straightforward, as can be seen in `mylstm.py`. However, note that in `main.py` we selectively reduce the gradient for `alpha` parameters when using plastic LSTMs with either per-neuron or single `alpha`. More precisely, we divide the gradient on `alpha` coefficients by a value that should be roughly equal to the number of neurons in the LSTM. This greatly enhances stability without forcing a reduction in learning rates. ================================================ FILE: awd-lstm-lm/TESTCOMMAND ================================================ python test.py --model MYLSTM --nhid 1150 --file ./HDFS/ptb/model__SqUsq_MYLSTM_clip_cv2.0_modplasth2mod_fanout_i2c_perneuron_asgdtime125_agdiv1150_lr30_3l_1150h_0.5lstm_rngseed1.dat ================================================ FILE: awd-lstm-lm/data.py ================================================ import os import torch from collections import Counter class Dictionary(object): def __init__(self): self.word2idx = {} self.idx2word = [] self.counter = Counter() self.total = 0 def add_word(self, word): if word not in self.word2idx: self.idx2word.append(word) self.word2idx[word] = len(self.idx2word) - 1 token_id = self.word2idx[word] self.counter[token_id] += 1 self.total += 1 return self.word2idx[word] def __len__(self): return len(self.idx2word) class Corpus(object): def __init__(self, path): self.dictionary = Dictionary() self.train = self.tokenize(os.path.join(path, 'train.txt')) self.valid = self.tokenize(os.path.join(path, 'valid.txt')) self.test = self.tokenize(os.path.join(path, 'test.txt')) def tokenize(self, path): """Tokenizes a text file.""" assert os.path.exists(path) # Add words to the dictionary with open(path, 'r') as f: tokens = 0 for line in f: words = line.split() + [''] tokens += len(words) for word in words: self.dictionary.add_word(word) # Tokenize file content with open(path, 'r') as f: ids = torch.LongTensor(tokens) token = 0 for line in f: words = line.split() + [''] for word in words: ids[token] = self.dictionary.word2idx[word] token += 1 return ids ================================================ FILE: awd-lstm-lm/embed_regularize.py ================================================ import numpy as np import pdb import torch def embedded_dropout(embed, words, dropout=0.1, scale=None): if dropout: mask = embed.weight.data.new().resize_((embed.weight.size(0), 1)).bernoulli_(1 - dropout).expand_as(embed.weight) / (1 - dropout) masked_embed_weight = mask * embed.weight else: masked_embed_weight = embed.weight if scale: masked_embed_weight = scale.expand_as(masked_embed_weight) * masked_embed_weight padding_idx = embed.padding_idx if padding_idx is None: padding_idx = -1 X = torch.nn.functional.embedding(words, masked_embed_weight, padding_idx, embed.max_norm, embed.norm_type, embed.scale_grad_by_freq, embed.sparse ) return X if __name__ == '__main__': V = 50 h = 4 bptt = 10 batch_size = 2 embed = torch.nn.Embedding(V, h) words = np.random.random_integers(low=0, high=V-1, size=(batch_size, bptt)) words = torch.LongTensor(words) origX = embed(words) X = embedded_dropout(embed, words) print(origX) print(X) ================================================ FILE: awd-lstm-lm/finetune.py ================================================ import argparse import time import math import numpy as np np.random.seed(331) import torch import torch.nn as nn import data import model from utils import batchify, get_batch, repackage_hidden parser = argparse.ArgumentParser(description='PyTorch PennTreeBank RNN/LSTM Language Model') parser.add_argument('--data', type=str, default='data/penn/', help='location of the data corpus') parser.add_argument('--model', type=str, default='LSTM', help='type of recurrent net (RNN_TANH, RNN_RELU, LSTM, GRU)') parser.add_argument('--emsize', type=int, default=400, help='size of word embeddings') parser.add_argument('--nhid', type=int, default=1150, help='number of hidden units per layer') parser.add_argument('--nlayers', type=int, default=3, help='number of layers') parser.add_argument('--lr', type=float, default=30, help='initial learning rate') parser.add_argument('--clip', type=float, default=0.25, help='gradient clipping') parser.add_argument('--epochs', type=int, default=8000, help='upper epoch limit') parser.add_argument('--batch_size', type=int, default=80, metavar='N', help='batch size') parser.add_argument('--bptt', type=int, default=70, help='sequence length') parser.add_argument('--dropout', type=float, default=0.4, help='dropout applied to layers (0 = no dropout)') parser.add_argument('--dropouth', type=float, default=0.3, help='dropout for rnn layers (0 = no dropout)') parser.add_argument('--dropouti', type=float, default=0.65, help='dropout for input embedding layers (0 = no dropout)') parser.add_argument('--dropoute', type=float, default=0.1, help='dropout to remove words from embedding layer (0 = no dropout)') parser.add_argument('--wdrop', type=float, default=0.5, help='amount of weight dropout to apply to the RNN hidden to hidden matrix') parser.add_argument('--tied', action='store_false', help='tie the word embedding and softmax weights') parser.add_argument('--seed', type=int, default=1111, help='random seed') parser.add_argument('--nonmono', type=int, default=5, help='random seed') parser.add_argument('--cuda', action='store_false', help='use CUDA') parser.add_argument('--log-interval', type=int, default=200, metavar='N', help='report interval') randomhash = ''.join(str(time.time()).split('.')) parser.add_argument('--save', type=str, default=randomhash+'.pt', help='path to save the final model') parser.add_argument('--alpha', type=float, default=2, help='alpha L2 regularization on RNN activation (alpha = 0 means no regularization)') parser.add_argument('--beta', type=float, default=1, help='beta slowness regularization applied on RNN activiation (beta = 0 means no regularization)') parser.add_argument('--wdecay', type=float, default=1.2e-6, help='weight decay applied to all weights') args = parser.parse_args() # Set the random seed manually for reproducibility. torch.manual_seed(args.seed) if torch.cuda.is_available(): if not args.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") else: torch.cuda.manual_seed(args.seed) ############################################################################### # Load data ############################################################################### corpus = data.Corpus(args.data) eval_batch_size = 10 test_batch_size = 1 train_data = batchify(corpus.train, args.batch_size, args) val_data = batchify(corpus.valid, eval_batch_size, args) test_data = batchify(corpus.test, test_batch_size, args) ############################################################################### # Build the model ############################################################################### ntokens = len(corpus.dictionary) model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.dropouth, args.dropouti, args.dropoute, args.wdrop, args.tied) if args.cuda: model.cuda() total_params = sum(x.size()[0] * x.size()[1] if len(x.size()) > 1 else x.size()[0] for x in model.parameters()) print('Args:', args) print('Model total parameters:', total_params) criterion = nn.CrossEntropyLoss() ############################################################################### # Training code ############################################################################### def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. if args.model == 'QRNN': model.reset() model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source) def train(): # Turn on training mode which enables dropout. if args.model == 'QRNN': model.reset() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) batch, i = 0, 0 while i < train_data.size(0) - 1 - 1: bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2. # Prevent excessively small or negative sequence lengths seq_len = max(5, int(np.random.normal(bptt, 5))) # There's a very small chance that it could select a very long sequence length resulting in OOM seq_len = min(seq_len, args.bptt + 10) lr2 = optimizer.param_groups[0]['lr'] optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt model.train() data, targets = get_batch(train_data, i, args, seq_len=seq_len) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) optimizer.zero_grad() output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True) raw_loss = criterion(output.view(-1, ntokens), targets) loss = raw_loss # Activiation Regularization loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:]) # Temporal Activation Regularization (slowness) loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:]) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) optimizer.step() total_loss += raw_loss.data optimizer.param_groups[0]['lr'] = lr2 if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss[0] / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'], elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time() ### batch += 1 i += seq_len # Load the best saved model. with open(args.save, 'rb') as f: model = torch.load(f) # Loop over epochs. lr = args.lr stored_loss = evaluate(val_data) best_val_loss = [] # At any point you can hit Ctrl + C to break out of training early. try: #optimizer = torch.optim.ASGD(model.parameters(), lr=args.lr, weight_decay=args.wdecay) optimizer = torch.optim.ASGD(model.parameters(), lr=args.lr, t0=0, lambd=0., weight_decay=args.wdecay) for epoch in range(1, args.epochs+1): epoch_start_time = time.time() train() if 't0' in optimizer.param_groups[0]: tmp = {} for prm in model.parameters(): tmp[prm] = prm.data.clone() prm.data = optimizer.state[prm]['ax'].clone() val_loss2 = evaluate(val_data) print('-' * 89) print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | ' 'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time), val_loss2, math.exp(val_loss2))) print('-' * 89) if val_loss2 < stored_loss: with open(args.save, 'wb') as f: torch.save(model, f) print('Saving Averaged!') stored_loss = val_loss2 for prm in model.parameters(): prm.data = tmp[prm].clone() if (len(best_val_loss)>args.nonmono and val_loss2 > min(best_val_loss[:-args.nonmono])): print('Done!') import sys sys.exit(1) optimizer = torch.optim.ASGD(model.parameters(), lr=args.lr, t0=0, lambd=0., weight_decay=args.wdecay) #optimizer.param_groups[0]['lr'] /= 2. best_val_loss.append(val_loss2) except KeyboardInterrupt: print('-' * 89) print('Exiting from training early') # Load the best saved model. with open(args.save, 'rb') as f: model = torch.load(f) # Run on test data. test_loss = evaluate(test_data, test_batch_size) print('=' * 89) print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format( test_loss, math.exp(test_loss))) print('=' * 89) ================================================ FILE: awd-lstm-lm/generate.py ================================================ ############################################################################### # Language Modeling on Penn Tree Bank # # This file generates new sentences sampled from the language model # ############################################################################### import argparse import torch from torch.autograd import Variable import data parser = argparse.ArgumentParser(description='PyTorch PTB Language Model') # Model parameters. parser.add_argument('--data', type=str, default='./data/penn', help='location of the data corpus') parser.add_argument('--model', type=str, default='LSTM', help='type of recurrent net (LSTM, QRNN)') parser.add_argument('--checkpoint', type=str, default='./model.pt', help='model checkpoint to use') parser.add_argument('--outf', type=str, default='generated.txt', help='output file for generated text') parser.add_argument('--words', type=int, default='1000', help='number of words to generate') parser.add_argument('--seed', type=int, default=1111, help='random seed') parser.add_argument('--cuda', action='store_true', help='use CUDA') parser.add_argument('--temperature', type=float, default=1.0, help='temperature - higher will increase diversity') parser.add_argument('--log-interval', type=int, default=100, help='reporting interval') args = parser.parse_args() # Set the random seed manually for reproducibility. torch.manual_seed(args.seed) if torch.cuda.is_available(): if not args.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") else: torch.cuda.manual_seed(args.seed) if args.temperature < 1e-3: parser.error("--temperature has to be greater or equal 1e-3") with open(args.checkpoint, 'rb') as f: model = torch.load(f) model.eval() if args.model == 'QRNN': model.reset() if args.cuda: model.cuda() else: model.cpu() corpus = data.Corpus(args.data) ntokens = len(corpus.dictionary) hidden = model.init_hidden(1) input = Variable(torch.rand(1, 1).mul(ntokens).long(), volatile=True) if args.cuda: input.data = input.data.cuda() with open(args.outf, 'w') as outf: for i in range(args.words): output, hidden = model(input, hidden) word_weights = output.squeeze().data.div(args.temperature).exp().cpu() word_idx = torch.multinomial(word_weights, 1)[0] input.data.fill_(word_idx) word = corpus.dictionary.idx2word[word_idx] outf.write(word + ('\n' if i % 20 == 19 else ' ')) if i % args.log_interval == 0: print('| Generated {}/{} words'.format(i, args.words)) ================================================ FILE: awd-lstm-lm/getdata.sh ================================================ echo "=== Acquiring datasets ===" echo "---" mkdir -p save mkdir -p data cd data #echo "- Downloading WikiText-2 (WT2)" #wget --quiet --continue https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip #unzip -q wikitext-2-v1.zip #cd wikitext-2 #mv wiki.train.tokens train.txt #mv wiki.valid.tokens valid.txt #mv wiki.test.tokens test.txt #cd .. # #echo "- Downloading WikiText-103 (WT2)" #wget --continue https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip #unzip -q wikitext-103-v1.zip #cd wikitext-103 #mv wiki.train.tokens train.txt #mv wiki.valid.tokens valid.txt #mv wiki.test.tokens test.txt #cd .. # #echo "- Downloading enwik8 (Character)" #mkdir -p enwik8 #cd enwik8 #wget --continue http://mattmahoney.net/dc/enwik8.zip #python prep_enwik8.py #cd .. echo "- Downloading Penn Treebank (PTB)" wget --quiet --continue http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz tar -xzf simple-examples.tgz mkdir -p penn cd penn mv ../simple-examples/data/ptb.train.txt train.txt mv ../simple-examples/data/ptb.test.txt test.txt mv ../simple-examples/data/ptb.valid.txt valid.txt cd .. #echo "- Downloading Penn Treebank (Character)" #mkdir -p pennchar #cd pennchar #mv ../simple-examples/data/ptb.char.train.txt train.txt #mv ../simple-examples/data/ptb.char.test.txt test.txt #mv ../simple-examples/data/ptb.char.valid.txt valid.txt #cd .. # rm -rf simple-examples/ # echo "- Downloading WikiText-2 (WT2)" # wget --quiet --continue https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip # unzip -q wikitext-2-v1.zip # cd wikitext-2 # mv wiki.train.tokens train.txt # mv wiki.valid.tokens valid.txt # mv wiki.test.tokens test.txt # echo "---" echo "Happy language modeling :)" ================================================ FILE: awd-lstm-lm/locked_dropout.py ================================================ import torch import torch.nn as nn from torch.autograd import Variable class LockedDropout(nn.Module): def __init__(self): super().__init__() def forward(self, x, dropout=0.5): if not self.training or not dropout: return x m = x.data.new(1, x.size(1), x.size(2)).bernoulli_(1 - dropout) mask = Variable(m, requires_grad=False) / (1 - dropout) mask = mask.expand_as(x) return mask * x ================================================ FILE: awd-lstm-lm/main.py ================================================ import OpusHdfsCopy from OpusHdfsCopy import transferFileToHdfsDir, checkHdfs import argparse import time import math import numpy as np import torch import torch.nn as nn from torch.autograd import Variable import pdb import data import model from utils import batchify, get_batch, repackage_hidden parser = argparse.ArgumentParser(description='PyTorch PennTreeBank RNN/LSTM Language Model') parser.add_argument('--data', type=str, default='data/penn/', help='location of the data corpus') parser.add_argument('--model', type=str, default='PLASTICLSTM', help='type of recurrent net (LSTM, QRNN, GRU, PLASTICLSTM, MYLSTM, FASTPLASTICLSTM, SIMPLEPLASTICLSTM)') parser.add_argument('--alphatype', type=str, default='full', help="type of alpha matrix: (full, perneuron, single)") parser.add_argument('--modultype', type=str, default='none', help="type of modulation: (none, modplasth2mod, modplastc2mod)") parser.add_argument('--modulout', type=str, default='single', help="modulatory output (single or fanout)") parser.add_argument('--cliptype', type=str, default='clip', help="clip type (decay, clip, aditya)") parser.add_argument('--hebboutput', type=str, default='i2c', help='output used for hebbian computations (i2c, h2co, cell, hidden)') parser.add_argument('--emsize', type=int, default=400, help='size of word embeddings') parser.add_argument('--nhid', type=int, default=1150, help='number of hidden units per layer') parser.add_argument('--nlayers', type=int, default=3, help='number of layers') parser.add_argument('--clipval', type=float, default=2.0, help='value of the hebbian trace clipping') parser.add_argument('--lr', type=float, default=30, help='initial learning rate') parser.add_argument('--agdiv', type=float, default=1150.0, help='divider of the gradient of alpha') parser.add_argument('--clip', type=float, default=0.25, help='gradient clipping') parser.add_argument('--epochs', type=int, default=300, help='upper epoch limit') parser.add_argument('--batch_size', type=int, default=80, metavar='N', help='batch size') parser.add_argument('--bptt', type=int, default=70, help='sequence length') parser.add_argument('--dropout', type=float, default=0.4, help='dropout applied to layers (0 = no dropout)') parser.add_argument('--dropouth', type=float, default=0.3, help='dropout for rnn layers (0 = no dropout)') parser.add_argument('--dropouti', type=float, default=0.65, help='dropout for input embedding layers (0 = no dropout)') parser.add_argument('--dropoute', type=float, default=0.1, help='dropout to remove words from embedding layer (0 = no dropout)') parser.add_argument('--proplstm', type=float, default=0.5, help='for split-lstms: proportion of LSTM cells in the recurrent layer') parser.add_argument('--wdrop', type=float, default=0.5, help='amount of weight dropout to apply to the RNN hidden to hidden matrix') parser.add_argument('--seed', type=int, default=1111, help='random seed') parser.add_argument('--asgdtime', type=int, default=-1, help='number of iterations before switch to ASGD (if positive)') parser.add_argument('--nonmono', type=int, default=5, help='range of non monotonicity before switch to ASGD (if asgdtime is negative)') parser.add_argument('--cuda', action='store_false', help='use CUDA') parser.add_argument('--numgpu', type=int, default=0, help='which GPU to use? (no effect if GPU not used at all)') parser.add_argument('--log-interval', type=int, default=200, metavar='N', help='report interval') randomhash = ''.join(str(time.time()).split('.')) parser.add_argument('--save', type=str, default=randomhash+'.pt', help='path to save the final model') parser.add_argument('--alpha', type=float, default=2, help='alpha L2 regularization on RNN activation (alpha = 0 means no regularization)') parser.add_argument('--beta', type=float, default=1, help='beta slowness regularization applied on RNN activiation (beta = 0 means no regularization)') parser.add_argument('--wdecay', type=float, default=1.2e-6, help='weight decay applied to all weights') parser.add_argument('--resume', type=str, default='', help='path of model to resume') parser.add_argument('--optimizer', type=str, default='sgd', help='optimizer to use (sgd, adam)') parser.add_argument('--when', nargs="+", type=int, default=[-1], help='When (which epochs) to divide the learning rate by 10 - accepts multiple') args = parser.parse_args() args.tied = True # Set the random seed manually for reproducibility. np.random.seed(args.seed) torch.manual_seed(args.seed) if torch.cuda.is_available(): if not args.cuda : print("WARNING: You have a CUDA device, so you should probably run with --cuda") else: torch.cuda.manual_seed(args.seed) else: print("NOTE: no CUDA device detected.") import platform print("PyTorch version:", torch.__version__, "Numpy version:", np.version.version, "Python version:", platform.python_version(), "GPU used (if any):", args.numgpu) ############################################################################### # Load data ############################################################################### def model_save(fn): with open(fn, 'wb') as f: torch.save([model, criterion, optimizer], f) def model_load(fn): global model, criterion, optimizer with open(fn, 'rb') as f: model, criterion, optimizer = torch.load(f) import os import hashlib fn = 'corpus.{}.data'.format(hashlib.md5(args.data.encode()).hexdigest()) if os.path.exists(fn): print('Loading cached dataset...') corpus = torch.load(fn) else: print('Producing dataset...') corpus = data.Corpus(args.data) torch.save(corpus, fn) eval_batch_size = 10 test_batch_size = 1 train_data = batchify(corpus.train, args.batch_size, args) val_data = batchify(corpus.valid, eval_batch_size, args) test_data = batchify(corpus.test, test_batch_size, args) #train_data = train_data[:5000,:] # For debugging ############################################################################### # Build the model ############################################################################### from splitcross import SplitCrossEntropyLoss criterion = None ntokens = len(corpus.dictionary) # Configuration parameters of the plastic LSTM. See mylstm.py for details. myparams={} myparams['clipval'] = args.clipval myparams['cliptype'] = args.cliptype myparams['modultype'] = args.modultype myparams['modulout'] = args.modulout myparams['hebboutput'] = args.hebboutput myparams['alphatype'] = args.alphatype suffix = '_SqUsq_'+args.model+'_'+myparams['cliptype']+'_cv'+str(myparams['clipval'])+'_'+myparams['modultype']+'_'+myparams['modulout']+'_'+myparams['hebboutput']+'_'+myparams['alphatype']+'_asgdtime'+str(args.asgdtime)+'_agdiv'+str(int(args.agdiv))+'_lr'+str(args.lr)+'_'+str(args.nlayers)+'l_'+str(args.nhid)+'h_'+str(args.proplstm)+'lstm_rngseed'+str(args.seed) print("Suffix:", suffix) MODELFILENAME = 'model_'+suffix+'.dat' RESULTSFILENAME = 'results_'+suffix+'.txt' FILENAMESTOSAVE = [MODELFILENAME, RESULTSFILENAME] # We will append to this list the additional files at each learning rate reduction, if any print("Plasticity and neuromodulation parameters:", myparams) model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.proplstm, args.nlayers, args.dropout, args.dropouth, args.dropouti, args.dropoute, args.wdrop, args.tied, myparams) ### if args.resume: print('Resuming model ...') model_load(args.resume) optimizer.param_groups[0]['lr'] = args.lr model.dropouti, model.dropouth, model.dropout, args.dropoute = args.dropouti, args.dropouth, args.dropout, args.dropoute if args.wdrop: from weight_drop import WeightDrop for rnn in model.rnns: if type(rnn) == WeightDrop: rnn.dropout = args.wdrop elif rnn.zoneout > 0: rnn.zoneout = args.wdrop ### if not criterion: splits = [] if ntokens > 500000: # One Billion # This produces fairly even matrix mults for the buckets: # 0: 11723136, 1: 10854630, 2: 11270961, 3: 11219422 splits = [4200, 35000, 180000] elif ntokens > 75000: # WikiText-103 splits = [2800, 20000, 76000] print('Using', splits) criterion = SplitCrossEntropyLoss(args.emsize, splits=splits, verbose=False) ### params = list(model.parameters()) + list(criterion.parameters()) if args.cuda: model = model.cuda(args.numgpu) criterion = criterion.cuda(args.numgpu) params = list(model.parameters()) + list(criterion.parameters()) ### #total_params = sum(x.size()[0] * x.size()[1] if len(x.size()) > 1 else x.size()[0] for x in params if x.size()) # Smerity version, doesn't work when size==3 total_params = sum(x.numel() for x in params if x.numel()) print('Args:', args) print('Model total parameters:', total_params) ############################################################################### # Training code ############################################################################### def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() with torch.no_grad(): if args.model == 'QRNN': model.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets).data hidden = repackage_hidden(hidden) #return total_loss[0] / len(data_source) # Error under modern PyTorch return total_loss / len(data_source) def train(): # Turn on training mode which enables dropout. if args.model == 'QRNN': model.reset() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) batch, i = 0, 0 while i < train_data.size(0) - 1 - 1: bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2. # Prevent excessively small or negative sequence lengths seq_len = max(5, int(np.random.normal(bptt, 5))) # There's a very small chance that it could select a very long sequence length resulting in OOM # NOTE: this was commented out in smerity's code! seq_len = min(seq_len, args.bptt + 10) lr2 = optimizer.param_groups[0]['lr'] optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt model.train() data, targets = get_batch(train_data, i, args, seq_len=seq_len) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. # NOTE: Now 'hidden' includes the Hebbian traces if using plasticity. hidden = repackage_hidden(hidden) optimizer.zero_grad() output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True) raw_loss = criterion(model.decoder.weight, model.decoder.bias, output, targets) loss = raw_loss # Activiation Regularization if args.alpha: loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:]) # Temporal Activation Regularization (slowness) if args.beta: loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:]) loss.backward() # When using plastic LSTMs, # We divide the gradient on the alphas by the number of inputs, i.e. # the number of recurrent neurons, but only if plasticity is # 'perneuron' or 'single' (as opposed to 'full'). # This is necessary to preserve stability while using the same learning rate as Merity et al. if args.model == 'PLASTICLSTM' or args.model == 'SPLITLSTM' or args.model == 'FASTPLASTICLSTM': if args.alphatype == 'perneuron' or args.alphatype == 'single': # Based on other experiments, this is actually not good for full-plasticity for x in model.rnns: if hasattr(x.alpha.grad, 'data'): x.alpha.grad.data /= args.agdiv # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. if args.clip: torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) # OPTIMIZATION STEP optimizer.step() total_loss += raw_loss.data optimizer.param_groups[0]['lr'] = lr2 if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f} | bpc {:8.3f}'.format( epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'], elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss), cur_loss / math.log(2))) total_loss = 0 start_time = time.time() ### batch += 1 i += seq_len # Loop over epochs. lr = args.lr best_val_loss = [] stored_loss = 100000000 # At any point you can hit Ctrl + C to break out of training early. try: optimizer = None if args.optimizer == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.wdecay) if args.optimizer == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wdecay) allvallosses = [] for epoch in range(1, args.epochs+1): epoch_start_time = time.time() train() if 't0' in optimizer.param_groups[0]: # Are we in the ASGD regime? tmp = {} for prm in model.parameters(): tmp[prm] = prm.data.clone() # NOTE (TM): the following line may cause trouble after the switch to ASGD if some declared pytorch Parameters of the network are not actually used in the computational graph prm.data = optimizer.state[prm]['ax'].clone() val_loss2 = evaluate(val_data, eval_batch_size) print('-' * 89) print('| end of epoch {:3d} (t0 on) | time: {:5.2f}s | valid loss {:5.2f} | ' 'valid ppl {:8.2f} | valloss2 ppl {:8.2f}'.format( epoch, (time.time() - epoch_start_time), val_loss, math.exp(val_loss), math.exp(val_loss2))) print('-' * 89) if val_loss2 < stored_loss: model_save(MODELFILENAME) print('Saving Averaged!') stored_loss = val_loss2 for prm in model.parameters(): prm.data = tmp[prm].clone() allvallosses.append(val_loss2) else: val_loss = evaluate(val_data, eval_batch_size) print('-' * 89) print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | ' 'valid ppl {:8.2f} | valid bpc {:8.3f}'.format( epoch, (time.time() - epoch_start_time), val_loss, math.exp(val_loss), val_loss / math.log(2))) print('-' * 89) if val_loss < stored_loss: model_save(MODELFILENAME) print('Saving model (new best validation)') stored_loss = val_loss if args.optimizer == 'sgd' and 't0' not in optimizer.param_groups[0]: if (args.asgdtime < 0 and len(best_val_loss)>args.nonmono and val_loss > min(best_val_loss[:-args.nonmono])) or (args.asgdtime > 0 and len(best_val_loss) == args.asgdtime) : print('Switching to ASGD') optimizer = torch.optim.ASGD(model.parameters(), lr=args.lr, t0=0, lambd=0., weight_decay=args.wdecay) if epoch in args.when: print('Saving model before learning rate decreased') EPOCHFILENAME = '{}.e{}'.format(MODELFILENAME, epoch) model_save(EPOCHFILENAME) FILENAMESTOSAVE.append(EPOCHFILENAME) print('Dividing learning rate by 10') optimizer.param_groups[0]['lr'] /= 10. best_val_loss.append(val_loss) allvallosses.append(val_loss) np.savetxt(RESULTSFILENAME, allvallosses) # Saving files remotely.... (Uber only!) if os.path.isdir('/mnt/share/tmiconi'): print("Transferring to NFS storage...") for fn in FILENAMESTOSAVE: result = os.system( 'cp {} {}'.format(fn, '/mnt/share/tmiconi/ptb/'+fn)) print("Done!") #if checkHdfs(): # print("Transfering to HDFS...") # for fn in FILENAMESTOSAVE: # transferFileToHdfsDir(fn, '/ailabs/tmiconi/ptb/') except KeyboardInterrupt: print('-' * 89) print('Exiting from training early') # Load the best saved model. model_load(MODELFILENAME) # Run on test data. test_loss = evaluate(test_data, test_batch_size) print('=' * 89) print('| End of training | test loss {:5.2f} | test ppl {:8.2f} | test bpc {:8.3f}'.format( test_loss, math.exp(test_loss), test_loss / math.log(2))) print('=' * 89) ================================================ FILE: awd-lstm-lm/model.py ================================================ import torch import torch.nn as nn #from torch.autograd import Variable from embed_regularize import embedded_dropout from locked_dropout import LockedDropout from weight_drop import WeightDrop import random, pdb import mylstm class RNNModel(nn.Module): """Container module with an encoder, a recurrent module, and a decoder.""" def __init__(self, rnn_type, ntoken, ninp, nhid, proplstm, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, params={}): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) assert rnn_type in ['LSTM', 'QRNN', 'GRU', 'MYLSTM', 'MYFASTLSTM', 'SIMPLEPLASTICLSTM', 'FASTPLASTICLSTM', 'PLASTICLSTM', 'SPLITLSTM'], 'RNN type is not supported' if rnn_type == 'LSTM': self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers)] #for rr in self.rnns: # rr.flatten_parameters() if wdrop: print("Using WeightDrop!") self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] elif rnn_type == 'MYLSTM': self.rnns = [mylstm.MyLSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid)) for l in range(nlayers)] elif rnn_type == 'MYFASTLSTM': self.rnns = [mylstm.MyFastLSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid)) for l in range(nlayers)] elif rnn_type == 'PLASTICLSTM': self.rnns = [mylstm.PlasticLSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), params) for l in range(nlayers)] elif rnn_type == 'SIMPLEPLASTICLSTM': # Note that this one ignores the 'params' argument, which is only kept to preserve identical signature with PlasticLSTM self.rnns = [mylstm.SimplePlasticLSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), params) for l in range(nlayers)] elif rnn_type == 'FASTPLASTICLSTM': self.rnns = [mylstm.MyFastPlasticLSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), params) for l in range(nlayers)] elif rnn_type == 'SPLITLSTM': # Not used self.rnns = [mylstm.SplitLSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), proplstm, params) for l in range(nlayers)] elif rnn_type == 'GRU': self.rnns = [torch.nn.GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] elif rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = [QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayers)] for rnn in self.rnns: rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop) print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) self.decoder = nn.Linear(nhid, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: #if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.proplstm = proplstm self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights def reset(self): if self.rnn_type == 'QRNN': [r.reset() for r in self.rnns] def init_weights(self): initrange = 0.1 self.encoder.weight.data.uniform_(-initrange, initrange) self.decoder.bias.data.fill_(0) self.decoder.weight.data.uniform_(-initrange, initrange) def forward(self, input, hidden, return_h=False): emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training else 0) #emb = self.idrop(emb) emb = self.lockdrop(emb, self.dropouti) raw_output = emb new_hidden = [] #raw_output, hidden = self.rnn(emb, hidden) raw_outputs = [] outputs = [] for l, rnn in enumerate(self.rnns): current_input = raw_output # Each rnn is a layer! # each raw_output has shape seq_len x batch_size x nb_hidden # new_h is a tuple of 2 elements, each of size 1 x batch_size x nb_hidden (last h and last c) if self.rnn_type != 'MYLSTM' and self.rnn_type != 'MYFASTLSTM' and self.rnn_type != 'SIMPLEPLASTICLSTM' and self.rnn_type != 'PLASTICLSTM' and self.rnn_type != 'FASTPLASTICLSTM' and self.rnn_type != 'SPLITLSTM': raw_output, new_h = rnn(raw_output, hidden[l]) else: single_h = hidden[l] # actually a tuple, includes the h and the c (and for plastic LTMS, includes Hebb as third element!) singleouts = [] for z in range(raw_output.shape[0]): singleout, single_h = rnn(raw_output[z], single_h) #if z==0: # print("RANDOM NUMBER 1:",float(torch.rand(1))) singleouts.append(singleout) new_h = single_h # the last (h,c[,hebb]) after the sequence is processed raw_output = torch.stack(singleouts) new_hidden.append(new_h) raw_outputs.append(raw_output) if l != self.nlayers - 1: #self.hdrop(raw_output) # lockdrop will zero out some output units over the whole sequence (separately chosen for each batch, but fixed across sequence) #pdb.set_trace() raw_output = self.lockdrop(raw_output, self.dropouth) outputs.append(raw_output) #pdb.set_trace() hidden = new_hidden #pdb.set_trace() output = self.lockdrop(raw_output, self.dropout) outputs.append(output) result = output.view(output.size(0)*output.size(1), output.size(2)) if return_h: return result, hidden, raw_outputs, outputs return result, hidden def init_hidden(self, bsz): weight = next(self.parameters()).data if self.rnn_type == 'MYLSTM' or self.rnn_type == 'MYFASTLSTM': return [((weight.new(bsz, self.nhid if l != self.nlayers - 1 else (self.ninp if self.tie_weights else self.nhid)).zero_()), (weight.new(bsz, self.nhid if l != self.nlayers - 1 else (self.ninp if self.tie_weights else self.nhid)).zero_())) for l in range(self.nlayers)] elif self.rnn_type == 'PLASTICLSTM' or self.rnn_type == 'SIMPLEPLASTICLSTM': return [( (weight.new(bsz, self.nhid if l != self.nlayers - 1 else (self.ninp if self.tie_weights else self.nhid)).zero_()), # h state (weight.new(bsz, self.nhid if l != self.nlayers - 1 else (self.ninp if self.tie_weights else self.nhid)).zero_()), # c state (weight.new(bsz, self.rnns[l].w.shape[0], self.rnns[l].w.shape[1]).zero_()) # hebbian trace for the recurrent weights #(weight.new(bsz, self.rnns[l].isize, self.rnns[l].hsize).zero_()) # hebbian trace for the input weights (not necessarily used) ) for l in range(self.nlayers)] elif self.rnn_type == 'FASTPLASTICLSTM': return [( (weight.new(bsz, self.nhid if l != self.nlayers - 1 else (self.ninp if self.tie_weights else self.nhid)).zero_()), # h state (weight.new(bsz, self.nhid if l != self.nlayers - 1 else (self.ninp if self.tie_weights else self.nhid)).zero_()), # c state (weight.new(bsz, self.rnns[l].hsize, self.rnns[l].hsize).zero_()) # hebbian trace of recurrent weights #(weight.new(bsz, self.rnns[l].isize, self.rnns[l].hsize).zero_()) # hebbian trace for the input weights (not necessarily used) #(weight.new(bsz, self.rnns[l].w.shape[0], self.rnns[l].w.shape[1]).zero_()), # hebbian trace for the recurrent weights #(weight.new(bsz, self.rnns[l].win.shape[0], self.rnns[l].win.shape[1]).zero_()) # hebbian trace for the input weights (not necessarily used) ) for l in range(self.nlayers)] elif self.rnn_type == 'SPLITLSTM': return [( (weight.new(bsz, self.nhid if l != self.nlayers - 1 else (self.ninp if self.tie_weights else self.nhid)).zero_()), # H state (weight.new(bsz, self.rnns[l].lsize ).zero_()), # C state (weight.new(bsz, self.rnns[l].w.shape[0], self.rnns[l].w.shape[1]).zero_()), # hebb (weight.new(bsz, self.rnns[l].win.shape[0], self.rnns[l].win.shape[1]).zero_()) # hebbin ) for l in range(self.nlayers)] elif self.rnn_type == 'LSTM' : return [((weight.new(1, bsz, self.nhid if l != self.nlayers - 1 else (self.ninp if self.tie_weights else self.nhid)).zero_()), (weight.new(1, bsz, self.nhid if l != self.nlayers - 1 else (self.ninp if self.tie_weights else self.nhid)).zero_())) for l in range(self.nlayers)] elif self.rnn_type == 'QRNN' or self.rnn_type == 'GRU': return [(weight.new(1, bsz, self.nhid if l != self.nlayers - 1 else (self.ninp if self.tie_weights else self.nhid)).zero_()) for l in range(self.nlayers)] ================================================ FILE: awd-lstm-lm/model.py.old ================================================ import torch import torch.nn as nn from torch.autograd import Variable from embed_regularize import embedded_dropout from locked_dropout import LockedDropout from weight_drop import WeightDrop class RNNModel(nn.Module): """Container module with an encoder, a recurrent module, and a decoder.""" def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported' if rnn_type == 'LSTM': self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] if rnn_type == 'GRU': self.rnns = [torch.nn.GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] elif rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = [QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayers)] for rnn in self.rnns: rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop) print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) self.decoder = nn.Linear(nhid, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: #if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights def reset(self): if self.rnn_type == 'QRNN': [r.reset() for r in self.rnns] def init_weights(self): initrange = 0.1 self.encoder.weight.data.uniform_(-initrange, initrange) self.decoder.bias.data.fill_(0) self.decoder.weight.data.uniform_(-initrange, initrange) def forward(self, input, hidden, return_h=False): emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training else 0) #emb = self.idrop(emb) emb = self.lockdrop(emb, self.dropouti) raw_output = emb new_hidden = [] #raw_output, hidden = self.rnn(emb, hidden) raw_outputs = [] outputs = [] for l, rnn in enumerate(self.rnns): current_input = raw_output raw_output, new_h = rnn(raw_output, hidden[l]) new_hidden.append(new_h) raw_outputs.append(raw_output) if l != self.nlayers - 1: #self.hdrop(raw_output) raw_output = self.lockdrop(raw_output, self.dropouth) outputs.append(raw_output) hidden = new_hidden output = self.lockdrop(raw_output, self.dropout) outputs.append(output) result = output.view(output.size(0)*output.size(1), output.size(2)) if return_h: return result, hidden, raw_outputs, outputs return result, hidden def init_hidden(self, bsz): weight = next(self.parameters()).data if self.rnn_type == 'LSTM': return [(Variable(weight.new(1, bsz, self.nhid if l != self.nlayers - 1 else (self.ninp if self.tie_weights else self.nhid)).zero_()), Variable(weight.new(1, bsz, self.nhid if l != self.nlayers - 1 else (self.ninp if self.tie_weights else self.nhid)).zero_())) for l in range(self.nlayers)] elif self.rnn_type == 'QRNN' or self.rnn_type == 'GRU': return [Variable(weight.new(1, bsz, self.nhid if l != self.nlayers - 1 else (self.ninp if self.tie_weights else self.nhid)).zero_()) for l in range(self.nlayers)] ================================================ FILE: awd-lstm-lm/mylstm.py ================================================ # Plastic LSTMs, with neuromodulation (backpropamine), # as described in Miconi et al. ICLR 2019, # by Thomas Miconi and Aditya Rawal. # Copyright (c) 2018-2019 Uber Technologies, Inc. # # Licensed under the Uber Non-Commercial License (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at the root directory of this project. import torch from torch import nn from torch.autograd import Variable import torch.nn.functional as F import numpy as np import pdb # SimplePlasticLSTM is a full-fledged implementation of Plastic LSTMs that uses # default settings and is not parametrizable beyond input size and hidden size. # This allows for simpler code and easier understanding. See "PlasticLSTM" # below for a more customizable version. class SimplePlasticLSTM(nn.Module): def __init__(self, isize, hsize, params): # Note that 'params' is ignored for this class; we keep it to preserve the constructor's signature super(SimplePlasticLSTM, self).__init__() self.softmax= torch.nn.functional.softmax self.activ = F.tanh # Plastic connection trainable parameters, i.e. w and alpha: self.w = torch.nn.Parameter(.02 * torch.rand(hsize, hsize) - .01) self.alpha = torch.nn.Parameter(.0001 * torch.rand(1,1,hsize)) # One alpha per neuron (all incoming connections to a neuron share same alpha) #self.alpha = torch.nn.Parameter(.0001 * torch.ones(1)) # One alpha for the whole network #self.alpha = torch.nn.Parameter(.0001 * torch.rand(hsize, hsize)) # One alpha per connection self.h2f = torch.nn.Linear(hsize, hsize) self.h2i = torch.nn.Linear(hsize, hsize) self.h2opt = torch.nn.Linear(hsize, hsize) #self.h2c = torch.nn.Linear(hsize, hsize) # This (equivalent to Whg in PyTorch LSTM docs / Uc in Wikipedia description of LSTM) is replaced by the plastic connection self.x2f = torch.nn.Linear(isize, hsize) self.x2opt = torch.nn.Linear(isize, hsize) self.x2i = torch.nn.Linear(isize, hsize) self.x2c = torch.nn.Linear(isize, hsize) # Modulator output (M(t)) self.h2mod = torch.nn.Linear(hsize, 1) # Takes input from the h-state, computes the neuromodulator output self.modfanout = torch.nn.Linear(1, hsize) # Projects the network's common neuromodulator output onto each neuron self.isize = isize self.hsize = hsize def forward(self, inputs, hidden): #, hebb, et, pw): # hidden is a tuple of h, c and hebb hebb = hidden[2] fgt = F.sigmoid(self.x2f(inputs) + self.h2f(hidden[0])) ipt = F.sigmoid(self.x2i(inputs) + self.h2i(hidden[0])) opt = F.sigmoid(self.x2opt(inputs) + self.h2opt(hidden[0])) # To implement plasticity, we replace h2c / Whg / Uc with a plastic connection composed of w, alpha and hebb # Note that h2c / Whg / Uc is the matrix of weights that takes in the # previous time-step h, and whose output (after adding the current input # and passing through tanh) is multiplied by the input gates before being # added to the cell state # Note: Each *column* in w, hebb and alpha constitutes the inputs to a single cell # For w and alpha, columns are 2nd dimension (i.e. dim 1); for hebb, it's dimension 2 (dimension 0 is batch) # This is probably not the most elegant way to do it, but it works (remember that there is one alpha per neuron, applied to all input connections of this neuron) h2coutput = hidden[0].unsqueeze(1).bmm(self.w + torch.mul(self.alpha, hebb)).squeeze(1) x2coutput = self.x2c(inputs) inputstocell = F.tanh(self.x2c(inputs) + h2coutput) # We compute this intermediary state to be used in Hebbian computations below # Finally, compute the new cell and hidden states cell = torch.mul(fgt, hidden[1]) + torch.mul(ipt, inputstocell) hactiv = torch.mul(opt, F.tanh(cell)) # Now we need to update the Hebbian traces, including any neuromodulation. deltahebb = torch.bmm(hidden[0].unsqueeze(2), inputstocell.unsqueeze(1)) myeta = F.tanh(self.h2mod(hactiv)).unsqueeze(2) # Shape: BatchSize x 1 x 1 # The output of the following line has shape BatchSize x 1 x NHidden, i.e. 1 line and NHidden columns for each # batch element. # When multiplying by deltahebb (BatchSize x NHidden x NHidden), broadcasting will provide a different # value for each column but the same value for all rows within each column. This is equivalent to providing # the same neuromodulation to all the inputs to a given cell, while letting neuromodulation differ from # cell to cell, as required for the fanout concept. myeta = self.modfanout(myeta).squeeze().unsqueeze(1) hebb = torch.clamp(hebb + myeta * deltahebb, min=-2.0, max=2.0) # Note that "hactiv" (i.e. the new h-state) is duplicated in the return # values. This is to maintain the signature used by main.py/model.py (which is from Merity et al.'s code) # and is not necessary for other applications. hidden = (hactiv, cell, hebb) activout = hactiv return activout, hidden # A more customizable version of plastic LSTMs, using parameters passed in the 'params' argument. class PlasticLSTM(nn.Module): def __init__(self, isize, hsize, params): super(PlasticLSTM, self).__init__() self.softmax= torch.nn.functional.softmax #if params['activ'] == 'tanh': self.activ = F.tanh # Default values for configuration parameters: self.cliptype, self.modultype, self.hebboutput, self.modulout, self.clipval, self.alphatype = 'clip', 'modplasth2mod', 'i2c', 'fanout', '2.0', 'perneuron' # Description of the parameters: # alphatype: do we have one alpha coefficient for each connection # ('full'), one per neuron ('perneuron' - i.e. all input connections to # a given neuron share the same alpha), or one for the entire network # ('single')? # modultype: 'none' (non-modulated plasticity) , 'modplasth2mod' # (neuromodulation takes input from the current h-state) or # 'modplastc2mod' (neuromodulation takes input from the currrent # c-state). # cliptype: 'clip', 'aditya' or 'decay' - specifies how the Hebbian traces should be constrained. # clipval: maximum magnitude of the Hebbian trace values (default 2.0) # modulout: 'single' (all connections receive the same neuromodulator # output) or 'fanout' (neuromodulator input goes through a 1xN linear layer to reach each neuron) # hebboutput: what counts as the "output" in the Hebbian product of input by output. Better to leave it at 'i2c'. if 'cliptype' in params: self.cliptype = params['cliptype'] if 'modultype' in params: self.modultype = params['modultype'] if 'hebboutput' in params: self.hebboutput = params['hebboutput'] if 'modulout' in params: self.modulout= params['modulout'] if 'clipval' in params: self.clipval= params['clipval'] if 'alphatype' in params: self.alphatype= params['alphatype'] # Plastic connection trainable parameters, i.e. w and alpha: self.w = torch.nn.Parameter(.02 * torch.rand(hsize, hsize) - .01) if self.alphatype == 'perneuron': self.alpha = torch.nn.Parameter(.0001 * torch.rand(1,1,hsize)) elif self.alphatype == 'single': self.alpha = torch.nn.Parameter(.0001 * torch.ones(1)) elif self.alphatype == 'full': self.alpha = torch.nn.Parameter(.0001 * torch.rand(hsize, hsize)) else: raise ValueError("Must select appropriate alpha type (current incorrect value is:", str(self.alphatype), ")") if self.modultype == 'none': self.eta = torch.nn.Parameter(.01 * torch.ones(1)) # Everyone has the same eta (Note: if a parameter is not actually used, there can be problems with ASGD handling in main.py) self.h2f = torch.nn.Linear(hsize, hsize) self.h2i = torch.nn.Linear(hsize, hsize) self.h2opt = torch.nn.Linear(hsize, hsize) #self.h2c = torch.nn.Linear(hsize, hsize) # This (equivalent to Whg in PyTorch LSTM docs / Uc in Wikipedia description of LSTM) is replaced by the plastic connection self.x2f = torch.nn.Linear(isize, hsize) self.x2opt = torch.nn.Linear(isize, hsize) self.x2i = torch.nn.Linear(isize, hsize) self.x2c = torch.nn.Linear(isize, hsize) if self.modultype != 'none': # This is the layer that computes the neuromodulator output at any time step, based on current hidden state. # Although called 'h2mod', it may take input from h or c depending on modultype value self.h2mod = torch.nn.Linear(hsize, 1) # Is the modulation just a single scalar, or do we pass it through a 'fanout' weight matrix to get one different value for each target neuron? if self.modulout == 'fanout': self.modfanout = torch.nn.Linear(1, hsize) self.isize = isize self.hsize = hsize def forward(self, inputs, hidden): #, hebb, et, pw): # hidden is a tuple of h, c and hebb hebb = hidden[2] fgt = F.sigmoid(self.x2f(inputs) + self.h2f(hidden[0])) ipt = F.sigmoid(self.x2i(inputs) + self.h2i(hidden[0])) opt = F.sigmoid(self.x2opt(inputs) + self.h2opt(hidden[0])) # To implement plasticity, we replace h2c / Whg / Uc with a plastic connection composed of w, alpha and hebb # Note that h2c / Whg / Uc is the matrix of weights that takes in the # previous time-step h, and whose output (after adding the current input # and passing through tanh) is multiplied by the input gates before being # added to the cell state # Note: Each *column* in w, hebb and alpha constitutes the inputs to a single cell # For w and alpha, columns are 2nd dimension (i.e. dim 1); for hebb, it's dimension 2 (dimension 0 is batch) if self.cliptype == 'aditya': # Clipping Hebbian traces a posteriori h2coutput = hidden[0].unsqueeze(1).bmm(self.w + torch.mul(self.alpha, torch.clamp(hebb, min=-self.clipval, max=self.clipval))).squeeze(1) else: h2coutput = hidden[0].unsqueeze(1).bmm(self.w + torch.mul(self.alpha, hebb)).squeeze(1) x2coutput = self.x2c(inputs) inputstocell = F.tanh(self.x2c(inputs) + h2coutput) # We compute this intermediary state to be used in Hebbian computations below # Finally, compute the new cell and hidden states cell = torch.mul(fgt, hidden[1]) + torch.mul(ipt, inputstocell) hactiv = torch.mul(opt, F.tanh(cell)) # Now we need to compute the updates to the Hebbian traces, including any neuromodulation. # For the Hebbian computation, what counts as "output"? if self.hebboutput == 'i2c': deltahebb = torch.bmm(hidden[0].unsqueeze(2), inputstocell.unsqueeze(1)) elif self.hebboutput == 'h2co': deltahebb = torch.bmm(hidden[0].unsqueeze(2), h2coutput.unsqueeze(1)) elif self.hebboutput == 'cell': deltahebb = torch.bmm(hidden[0].unsqueeze(2), cell.unsqueeze(1)) elif self.hebboutput == 'hidden': deltahebb = torch.bmm(hidden[0].unsqueeze(2), hactiv.unsqueeze(1)) else: raise ValueError("Must choose Hebbian target output") # What is the source of the neuromodulator computation (if any)? if self.modultype == 'none': myeta = self.eta elif self.modultype == 'modplasth2mod': # The neuromodulation takes input from the h-state myeta = F.tanh(self.h2mod(hactiv)).unsqueeze(2) # Shape: BatchSize x 1 x 1 elif self.modultype == 'modplastc2mod': # The neuromodulation takes input from the c-state myeta = F.tanh(self.h2mod(cell)).unsqueeze(2) else: raise ValueError("Must choose modulation type") # If we use "fanout" neuromodulation, the neuromodulator output is passed through a (trainable) linear layer before hitting the neurons. if self.modultype != 'none' and self.modulout == 'fanout': # The output of the following line has shape BatchSize x 1 x NHidden, i.e. 1 line and NHidden columns for each # batch element. # When multiplying by deltahebb (BatchSize x NHidden x NHidden), broadcasting will provide a different # value for each column but the same value for all rows within each column. This is equivalent to providing # the same neuromodulation to all the inputs to a given cell, while letting neuromodulation differ from # cell to cell, as required for the fanout concept. myeta = self.modfanout(myeta).squeeze().unsqueeze(1) # Various possible ways to clip the Hebbian trace if self.cliptype == 'decay': # Exponential decay hebb = (1 - myeta) * hebb + myeta * deltahebb elif self.cliptype == 'clip': # Just a hard clip hebb = torch.clamp(hebb + myeta * deltahebb, min=-self.clipval, max=self.clipval) elif self.cliptype == 'aditya': # For this one, the clipping only occurs a posteriori (see above); hebb itself can grow arbitrarily hebb = hebb + myeta * deltahebb else: raise ValueError("Must choose clip type") # Note that "hactiv" (i.e. the new h-state) is duplicated in the return # values. This is to maintain the signature used by main.py/model.py # and is not necessary for other applications. hidden = (hactiv, cell, hebb) activout = hactiv return activout, hidden # This is a slightly faster implementation of Plastic Lstms: cut time by ~30% by grouping all matrix multiplications into two. Not fully debugged, use at own risk. class MyFastPlasticLSTM(nn.Module): def __init__(self, isize, hsize, params): super(MyFastPlasticLSTM, self).__init__() self.softmax= torch.nn.functional.softmax self.activ = F.tanh ok=0 if 'cliptype' in params: self.cliptype = params['cliptype'] ok+=1 if 'modultype' in params: self.modultype = params['modultype'] ok+=1 if 'hebboutput' in params: self.hebboutput = params['hebboutput'] ok+=1 if 'modulout' in params: self.modulout= params['modulout'] ok+=1 if 'clipval' in params: self.clipval= params['clipval'] ok+=1 if 'alphatype' in params: self.alphatype= params['alphatype'] ok+=1 if ok < 6: raise ValueError('When constructing PlasticLSTM, must pass "params" dictionary including cliptype, clipval, modultype, modulout, alphatype and hebboutput') # We group all weight matrices into two, just like the C implementation of LSTMs in PyTorch does. Faster! # Note: this creates some redundant biases (though not many) self.h2f_i_opt_c = torch.nn.Linear(hsize, 4*hsize) # Weights from h to f, i, o and c self.x2f_i_opt_c = torch.nn.Linear(isize, 4*hsize) # Weights from x to f, i, o and c self.isize = isize self.hsize = hsize if self.modultype != 'none': self.h2mod = torch.nn.Linear(hsize, 1) # Although called 'h2mod', it may take input from h or c depending on modultype value if self.modulout == 'fanout': self.modfanout = torch.nn.Linear(1, hsize) if self.alphatype == 'perneuron': self.alpha = torch.nn.Parameter(.0001 * torch.rand(1,1,hsize)) #self.alpha = Variable(.0001 * torch.ones(1).cuda(), requires_grad=True) #torch.rand(1,1,hsize)) elif self.alphatype == 'single': self.alpha = torch.nn.Parameter(.0001 * torch.ones(1)) elif self.alphatype == 'full': self.alpha = torch.nn.Parameter(.0001 * torch.rand(hsize, hsize)) else: raise ValueError("Must select alpha type (current incorrect value is:", str(self.alphatype), ")") if self.modultype == 'none': self.eta = torch.nn.Parameter(.01 * torch.ones(1)) # Everyone has the same eta (Note: if a parameter is not actually used, there can be problems with ASGD handling in main.py) def forward(self, inputs, hidden): #, hebb, et, pw): # hidden is a tuple of h and c states hsize = self.hsize #fgt = F.sigmoid(self.x2f(inputs) + self.h2f(hidden[0])) #ipt = F.sigmoid(self.x2i(inputs) + self.h2i(hidden[0])) #opt = F.sigmoid(self.x2opt(inputs) + self.h2opt(hidden[0])) alloutputs = self.x2f_i_opt_c(inputs) + self.h2f_i_opt_c(hidden[0]) # hidden[0] and hidden[1] are the h state and the c state; hidden[2] is the hebbian trace hebb = hidden[2] fgt = F.sigmoid(alloutputs[:,:hsize]) ipt = F.sigmoid(alloutputs[:,hsize:2*hsize]) opt = F.sigmoid(alloutputs[:,2*hsize:3*hsize]) handx2coutput_w = alloutputs[:,3*hsize:] if self.cliptype == 'aditya': h2coutput_hebb = hidden[0].unsqueeze(1).bmm(torch.mul(self.alpha, self.clipval * torch.tanh(hebb))).squeeze(1) # Slightly different version else: h2coutput_hebb = hidden[0].unsqueeze(1).bmm(torch.mul(self.alpha, hebb)).squeeze(1) inputtoc = F.tanh(handx2coutput_w + h2coutput_hebb) # Each *column* in w, hebb and alpha constitutes the inputs to a single cell # For w and alpha, columns are 2nd dimension (i.e. dim 1); for hebb, it's dimension 2 (dimension 0 is batch) cell = torch.mul(fgt, hidden[1]) + torch.mul(ipt, inputtoc) hactiv = torch.mul(opt, F.tanh(cell)) #if self.hebboutput == 'i2c': deltahebb = torch.bmm(hidden[0].unsqueeze(2), inputtoc.unsqueeze(1)) if self.modultype == 'none': myeta = self.eta elif self.modultype == 'modplasth2mod': myeta = F.tanh(self.h2mod(hactiv)).unsqueeze(2) # Shape: BatchSize x 1 x 1 elif self.modultype == 'modplastc2mod': myeta = F.tanh(self.h2mod(cell)).unsqueeze(2) else: raise ValueError("Must choose modulation type") if self.modultype != 'none' and self.modulout == 'fanout': # Each *column* in w, hebb and alpha constitutes the inputs to a single cell # For w and alpha, columns are 2nd dimension (i.e. dim 1); for hebb, it's dimension 2 (dimension 0 is batch) # The output of the following line has shape BatchSize x 1 x NHidden, i.e. 1 line and NHidden columns for each # batch element. When multiplying by hebb (BatchSize x NHidden x NHidden), broadcasting will provide a different # value of myeta for each cell but the same value for all inputs of a cell, as required by fanout concept. myeta = self.modfanout(myeta).squeeze().unsqueeze(1) if self.cliptype == 'decay': hebb = (1 - myeta) * hebb + myeta * deltahebb elif self.cliptype == 'clip': hebb = torch.clamp(hebb + myeta * deltahebb, min=-self.clipval, max=self.clipval) elif self.cliptype == 'aditya' : hebb = hebb + myeta * deltahebb else: raise ValueError("Must choose clip type") hidden = (hactiv, cell, hebb) activout = hactiv return activout, hidden #, hebb, et, pw # Standard, non-plastic LSTM, reimplemented "by hand" to check if our # implementation is correct, and to ensure that our comparisons use the closest # possible non-plastic equivalent to our plastic LSTMs. Gets almost identical # results to the PyTorch internal LSTM used by the original smerity code. class MyLSTM(nn.Module): def __init__(self, isize, hsize): super(MyLSTM, self).__init__() self.softmax= torch.nn.functional.softmax #if params['activ'] == 'tanh': self.activ = F.tanh self.h2f = torch.nn.Linear(hsize, hsize) self.h2i = torch.nn.Linear(hsize, hsize) self.h2opt = torch.nn.Linear(hsize, hsize) self.h2c = torch.nn.Linear(hsize, hsize) self.x2f = torch.nn.Linear(isize, hsize) self.x2opt = torch.nn.Linear(isize, hsize) self.x2i = torch.nn.Linear(isize, hsize) self.x2c = torch.nn.Linear(isize, hsize) self.isize = isize self.hsize = hsize def forward(self, inputs, hidden): #, hebb, et, pw): # hidden is a tuple of h and c states fgt = F.sigmoid(self.x2f(inputs) + self.h2f(hidden[0])) ipt = F.sigmoid(self.x2i(inputs) + self.h2i(hidden[0])) opt = F.sigmoid(self.x2opt(inputs) + self.h2opt(hidden[0])) cell = torch.mul(fgt, hidden[1]) + torch.mul(ipt, F.tanh(self.x2c(inputs) + self.h2c(hidden[0]))) hactiv = torch.mul(opt, F.tanh(cell)) #pdb.set_trace() hidden = (hactiv, cell) activout = hactiv #self.h2o(hactiv) #pdb.set_trace() return activout, hidden #, hebb, et, pw # Faster MyLSTM - by ~30% in comparison to MyLSTM, by grouping matrices and matrix multiplications. Not fully debugged, use at own risk. class MyFastLSTM(nn.Module): def __init__(self, isize, hsize): super(MyFastLSTM, self).__init__() self.softmax= torch.nn.functional.softmax #if params['activ'] == 'tanh': self.activ = F.tanh # We group all weight matrices into two, just like the C implementation of LSTMs in PyTorch does # Note: this creates some redundant biases (though not many) self.h2f_i_opt_c = torch.nn.Linear(hsize, 4*hsize) # Weights from h to f, i, o and c self.x2f_i_opt_c = torch.nn.Linear(isize, 4*hsize) # Weights from x to f, i, o and c self.isize = isize self.hsize = hsize def forward(self, inputs, hidden): #, hebb, et, pw): # hidden is a tuple of h and c states #fgt = F.sigmoid(self.x2f(inputs) + self.h2f(hidden[0])) # #ipt = F.sigmoid(self.x2i(inputs) + self.h2i(hidden[0])) # #opt = F.sigmoid(self.x2opt(inputs) + self.h2opt(hidden[0])) # alloutputs = self.x2f_i_opt_c(inputs) + self.h2f_i_opt_c(hidden[0]) hsize = self.hsize # You can gain ~ 5% in speed by grouping these three : fgt = F.sigmoid(alloutputs[:,:hsize]) ipt = F.sigmoid(alloutputs[:,hsize:2*hsize]) opt = F.sigmoid(alloutputs[:,2*hsize:3*hsize]) inputtoc = F.tanh(alloutputs[:,3*hsize:]) #cell = torch.mul(fgt, hidden[1]) + torch.mul(ipt, F.tanh(self.x2c(inputs) + self.h2c(hidden[0])))# cell = torch.mul(fgt, hidden[1]) + torch.mul(ipt, inputtoc) hactiv = torch.mul(opt, F.tanh(cell)) hidden = (hactiv, cell) activout = hactiv #pdb.set_trace() return activout, hidden #, hebb, et, pw ================================================ FILE: awd-lstm-lm/mylstm.py.orig ================================================ import torch from torch import nn from torch.autograd import Variable import torch.nn.functional as F import numpy as np import pdb class PlasticLSTM(nn.Module): def __init__(self, isize, hsize, params): super(PlasticLSTM, self).__init__() self.softmax= torch.nn.functional.softmax #if params['activ'] == 'tanh': self.activ = F.tanh ok=0 if 'cliptype' in params: self.cliptype = params['cliptype'] ok+=1 if 'modultype' in params: self.modultype = params['modultype'] ok+=1 if 'hebboutput' in params: self.hebboutput = params['hebboutput'] ok+=1 if 'modulout' in params: self.modulout= params['modulout'] ok+=1 if 'alphatype' in params: self.alphatype= params['alphatype'] ok+=1 if ok < 5: raise ValueError('When using PlasticLSTM, must specify cliptype, modultype, modulout, alphatype and hebboutput in params') # Plastic connection parameters: self.w = torch.nn.Parameter(.02 * torch.rand(hsize, hsize) - .01) if self.alphatype == 'perneuron': self.alpha = torch.nn.Parameter(.0001 * torch.rand(1,1,hsize)) #self.alpha = Variable(.0001 * torch.ones(1).cuda(), requires_grad=True) #torch.rand(1,1,hsize)) elif self.alphatype == 'full': self.alpha = torch.nn.Parameter(.0001 * torch.rand(hsize, hsize)) else: raise ValueError("Must select alpha type (current incorrect value is:", str(self.alphatype), ")") if self.modultype == 'none': self.eta = torch.nn.Parameter(.01 * torch.ones(1)) # Everyone has the same eta (Note: if a parameter is not actually used, there can be problems with ASGD handling in main.py) #self.eta = .01 self.h2f = torch.nn.Linear(hsize, hsize) self.h2i = torch.nn.Linear(hsize, hsize) self.h2opt = torch.nn.Linear(hsize, hsize) #self.h2c = torch.nn.Linear(hsize, hsize) # This (equivalent to Whg in the PyTorch docs, Uc in Wikipedia) is replaced by the plastic connection self.x2f = torch.nn.Linear(isize, hsize) self.x2opt = torch.nn.Linear(isize, hsize) self.x2i = torch.nn.Linear(isize, hsize) self.x2c = torch.nn.Linear(isize, hsize) # Is the modulation just a single scalar, or do we pass it through a 'fanout' weight matrix? if self.modultype != 'none': self.h2mod = torch.nn.Linear(hsize, 1) # Although called 'h2mod', it may take input from h or c depending on modultype value if self.modulout == 'fanout': self.modfanout = torch.nn.Linear(1, hsize) self.isize = isize self.hsize = hsize def forward(self, inputs, hidden): #, hebb, et, pw): # hidden is a tuple of h, c and hebb hebb = hidden[2] fgt = F.sigmoid(self.x2f(inputs) + self.h2f(hidden[0])) ipt = F.sigmoid(self.x2i(inputs) + self.h2i(hidden[0])) opt = F.sigmoid(self.x2opt(inputs) + self.h2opt(hidden[0])) #cell = torch.mul(fgt, hidden[1]) + torch.mul(ipt, F.tanh(self.x2c(inputs) + self.h2c(hidden[0]))) # To implement plasticity, we replace h2c / Whg / Uc with a plastic connection composed of w, alpha and hebb # Note that h2c / Whg / Uc is the matrix of weights that takes in the # previous time-step h, and whose output (after adding the current input # and passing through tanh) is multiplied by the input gates before being # added to the cell state if self.cliptype == 'aditya': # Each *column* in w, hebb and alpha constitutes the inputs to a single cell # For w and alpha, columns are 2nd dimension (i.e. dim 1); for hebb, it's dimension 2 (dimension 0 is batch) h2coutput = hidden[0].unsqueeze(1).bmm(self.w + torch.mul(self.alpha, torch.clamp(hebb, min=-1.0, max=1.0))).squeeze() else: h2coutput = hidden[0].unsqueeze(1).bmm(self.w + torch.mul(self.alpha, hebb)).squeeze() #if np.random.rand() < .1: # pdb.set_trace() inputstocell = F.tanh(self.x2c(inputs) + h2coutput) #inputstocell = F.tanh(self.x2c(inputs) + torch.matmul(hidden[0].unsqueeze(1), self.w.unsqueeze(0)).squeeze(1)) cell = torch.mul(fgt, hidden[1]) + torch.mul(ipt, inputstocell) # self.h2c(hidden[0]))) #pdb.set_trace() hactiv = torch.mul(opt, F.tanh(cell)) #pdb.set_trace() # For the Hebbian computation, what counts as "output"? if self.hebboutput == 'i2c': deltahebb = torch.bmm(hidden[0].unsqueeze(2), inputstocell.unsqueeze(1)) elif self.hebboutput == 'h2co': deltahebb = torch.bmm(hidden[0].unsqueeze(2), h2coutput.unsqueeze(1)) elif self.hebboutput == 'cell': deltahebb = torch.bmm(hidden[0].unsqueeze(2), cell.unsqueeze(1)) elif self.hebboutput == 'hidden': deltahebb = torch.bmm(hidden[0].unsqueeze(2), hactiv.unsqueeze(1)) else: raise ValueError("Must choose Hebbian target output") # What is the source of the neuromodulator computation (if any)? if self.modultype == 'none': myeta = self.eta elif self.modultype == 'modplasth2mod': myeta = F.tanh(self.h2mod(hactiv)).unsqueeze(2) # Shape: BatchSize x 1 x 1 elif self.modultype == 'modplastc2mod': myeta = F.tanh(self.h2mod(cell)).unsqueeze(2) else: raise ValueError("Must choose modulation type") #pdb.set_trace() if self.modultype != 'none' and self.modulout == 'fanout': # Each *column* in w, hebb and alpha constitutes the inputs to a single cell # For w and alpha, columns are 2nd dimension (i.e. dim 1); for hebb, it's dimension 2 (dimension 0 is batch) # The output of the following line has shape BatchSize x 1 x NHidden, i.e. 1 line and NHidden columns for each # batch element. When multiplying by hebb (BatchSize x NHidden x NHidden), broadcasting will provide a different # value for each cell but the same value for all inputs of a cell, as required by fanout concept. myeta = self.modfanout(myeta).squeeze().unsqueeze(1) if self.cliptype == 'decay': hebb = (1 - myeta) * hebb + myeta * deltahebb elif self.cliptype == 'clip': hebb = torch.clamp(hebb + myeta * deltahebb, min=-1.0, max=1.0) elif self.cliptype == 'aditya': hebb = hebb + myeta * deltahebb else: raise ValueError("Must choose clip type") hidden = (hactiv, cell, hebb) activout = hactiv #self.h2o(hactiv) #if np.isnan(np.sum(hactiv.data.cpu().numpy())) or np.isnan(np.sum(hidden[1].data.cpu().numpy())) : # raise ValueError("Nan detected !") #pdb.set_trace() return activout, hidden #, hebb, et, pw class MyLSTM(nn.Module): # Standard, non-plastic LSTM, reimplemented "by hand" to check if our # implementation is correct. Gets almost identical results to the PyTorch # internal LSTM used by the original smerity code. def __init__(self, isize, hsize): super(MyLSTM, self).__init__() self.softmax= torch.nn.functional.softmax #if params['activ'] == 'tanh': self.activ = F.tanh self.h2f = torch.nn.Linear(hsize, hsize) self.h2i = torch.nn.Linear(hsize, hsize) self.h2opt = torch.nn.Linear(hsize, hsize) self.h2c = torch.nn.Linear(hsize, hsize) self.x2f = torch.nn.Linear(isize, hsize) self.x2opt = torch.nn.Linear(isize, hsize) self.x2i = torch.nn.Linear(isize, hsize) self.x2c = torch.nn.Linear(isize, hsize) self.isize = isize self.hsize = hsize def forward(self, inputs, hidden): #, hebb, et, pw): # hidden is a tuple of h and c states fgt = F.sigmoid(self.x2f(inputs) + self.h2f(hidden[0])) ipt = F.sigmoid(self.x2i(inputs) + self.h2i(hidden[0])) opt = F.sigmoid(self.x2opt(inputs) + self.h2opt(hidden[0])) cell = torch.mul(fgt, hidden[1]) + torch.mul(ipt, F.tanh(self.x2c(inputs) + self.h2c(hidden[0]))) hactiv = torch.mul(opt, F.tanh(cell)) #pdb.set_trace() hidden = (hactiv, cell) activout = hactiv #self.h2o(hactiv) #if np.isnan(np.sum(hactiv.data.cpu().numpy())) or np.isnan(np.sum(hidden[1].data.cpu().numpy())) : # raise ValueError("Nan detected !") #pdb.set_trace() return activout, hidden #, hebb, et, pw ================================================ FILE: awd-lstm-lm/opus.docker.old ================================================ #tmiconi_rl #latest #. #FROM localhost:5000/opus-deep-learning:master-test-2017_9_7_20_56_10 FROM opus-deep-learning-py3:master-prod-2019_2_5_4_54_39 #FROM opus-deep-learning:master--2018_9_20_18_2_31 RUN mkdir /home/work COPY ./*.py /home/work/ COPY ./*.sh /home/work/ COPY ./*.md /home/work/ ENV LC_ALL C.UTF-8 ENV LANG C.UTF-8 ================================================ FILE: awd-lstm-lm/plotresults.py ================================================ import numpy as np import glob import matplotlib.pyplot as plt import scipy from scipy import stats colorz = ['r', 'b', 'g', 'c', 'm', 'y', 'orange', 'k'] groupnames = glob.glob('./HDFS/ptb/results*seed0.txt') #groupnames = glob.glob('./HDFS/ptbprevious/results*seed0.txt') #groupnames = glob.glob('./HDFS/ptbold/results*.txt') #groupnames = glob.glob('./tmp/loss_*new*eplen_250*rngseed_0.txt') #groupnames = glob.glob('./tmp/loss_*new*.9_*rngseed_0.txt') # If you can only use 7 runs, smooth the losses within each run to obtain more reliable estimates of performance! def mavg(x, N=20): return x #cumsum = np.cumsum(np.insert(x, 0, 0)) #return (cumsum[N:] - cumsum[:-N]) / N plt.ion() #plt.figure(figsize=(5,4)) # Smaller figure = relative larger fonts plt.figure() allmedianls = [] alllosses = [] poscol = 0 minminlen = 999999 for numgroup, groupname in enumerate(groupnames): if "ults__" not in groupname: continue g = groupname[:-6]+"*" print("====", groupname) fnames = glob.glob(g) fulllosses=[] losses=[] lgts=[] for fn in fnames: if "COPY" in fn: continue if False: #if "seed_3" in fn: # continue #if "seed_7" in fn: # continue if "seed_8" in fn: continue if "seed_9" in fn: continue if "seed_10" in fn: continue if "seed_11" in fn: continue if "seed_12" in fn: continue if "seed_13" in fn: continue if "seed_14" in fn: continue if "seed_15" in fn: continue z = np.loadtxt(fn) #z = mavg(z, 10) # For each run, we average the losses over K successive episodes #z = z[::10] # Decimation - speed things up! print(len(z)) #if len(z) < 100: # print(fn, len(z)) # continue #z = z[:90] lgts.append(len(z)) fulllosses.append(z) minlen = min(lgts) if minlen < minminlen: minminlen = minlen print(minlen) #if minlen < 1000: # continue for z in fulllosses: losses.append(z[:minlen]) losses = np.array(losses) alllosses.append(losses) meanl = np.mean(losses, axis=0) stdl = np.std(losses, axis=0) cil = stdl / np.sqrt(losses.shape[0]) * 1.96 # 95% confidence interval - assuming normality #cil = stdl / np.sqrt(losses.shape[0]) * 2.5 # 95% confidence interval - approximated with the t-distribution for 7 d.f. medianl = np.median(losses, axis=0) allmedianls.append(medianl) q1l = np.percentile(losses, 25, axis=0) q3l = np.percentile(losses, 75, axis=0) highl = np.max(losses, axis=0) lowl = np.min(losses, axis=0) #highl = meanl+stdl #lowl = meanl-stdl xx = range(len(meanl)) # xticks and labels #xt = range(0, len(meanl), 1000) xt = range(0, 10001, 2000) xtl = [str(10 * 10 * i) for i in xt] # Because of decimation above, and only every 10th loss is recorded in the files #plt.plot(mavg(meanl, 100), label=g) #, color='blue') #plt.fill_between(xx, lowl, highl, alpha=.2) #plt.fill_between(xx, q1l, q3l, alpha=.1) #plt.plot(meanl) #, color='blue') ####plt.plot(mavg(medianl, 100), label=g) #, color='blue') # mavg changes the number of points ! #plt.plot(mavg(q1l, 100), label=g, alpha=.3) #, color='blue') #plt.plot(mavg(q3l, 100), label=g, alpha=.3) #, color='blue') #plt.fill_between(xx, q1l, q3l, alpha=.2) #plt.plot(medianl, label=g) #, color='blue') AVGSIZE = 1 xlen = len(mavg(q1l, AVGSIZE)) #mylabel = g[g.find('type'):] mylabel = g myls = '-' if poscol >= len(colorz): myls = "--" plt.plot(mavg(medianl, AVGSIZE), label=mylabel, color=colorz[poscol % len(colorz)], ls=myls) # mavg changes the number of points ! plt.fill_between( range(xlen), mavg(q1l, AVGSIZE), mavg(q3l, AVGSIZE), alpha=.2, color=colorz[poscol % len(colorz)]) #xlen = len(mavg(meanl, AVGSIZE)) #plt.plot(mavg(meanl, AVGSIZE), label=g, color=colorz[poscol % len(colorz)]) # mavg changes the number of points ! #plt.fill_between( range(xlen), mavg(meanl - cil, AVGSIZE), mavg(meanl + cil, AVGSIZE), alpha=.2, color=colorz[poscol % len(colorz)]) poscol += 1 #plt.fill_between( range(xlen), mavg(lowl, 100), mavg(highl, 100), alpha=.2, color=colorz[numgroup % len(colorz)]) #plt.plot(mavg(losses[0], 1000), label=g, color=colorz[numgroup % len(colorz)]) #for curve in losses[1:]: # plt.plot(mavg(curve, 1000), color=colorz[numgroup % len(colorz)]) ps = [] # Adapt for varying lengths across groups #for n in range(0, alllosses[0].shape[1], 3): #for n in range(0, minminlen): # ps.append(scipy.stats.ranksums(alllosses[0][:,n], alllosses[1][:,n]).pvalue) #ps = np.array(ps) plt.legend(loc='best', fontsize=12) #plt.xlabel('Loss (sum square diff. b/w final output and target)') plt.xlabel('Number of Episodes') plt.ylabel('Loss') #plt.xticks(xt, xtl) #plt.tight_layout() ================================================ FILE: awd-lstm-lm/plotresultssingle.py ================================================ import numpy as np import matplotlib.pyplot as plt import glob fns = glob.glob('./HDFS/ptb/results_*.txt') plt.figure() numcurve = 0 for (ii, fn) in enumerate(fns): #if 'B_' not in fn and 'MYLSTM' not in fn: # continue if 'rngseed' in fn: if 'seed0' not in fn: continue if 'agdiv10' in fn: continue #if '44' not in fn: # continue print(fn) #if 'perneuron' in fn: # continue numcurve += 1 if numcurve > 20: ls = ':' elif numcurve > 10: ls = '--' else: ls = '-' #z = np.loadtxt(fn) z = np.exp(np.loadtxt(fn)) plt.plot(z, label=fn,ls=ls) plt.legend(loc='upper right') plt.show() ================================================ FILE: awd-lstm-lm/pointer.py ================================================ import argparse import time import math import numpy as np import torch import torch.nn as nn from torch.autograd import Variable import data import model from utils import batchify, get_batch, repackage_hidden parser = argparse.ArgumentParser(description='PyTorch PennTreeBank RNN/LSTM Language Model') parser.add_argument('--data', type=str, default='data/penn', help='location of the data corpus') parser.add_argument('--model', type=str, default='LSTM', help='type of recurrent net (LSTM, QRNN)') parser.add_argument('--save', type=str,default='best.pt', help='model to use the pointer over') parser.add_argument('--cuda', action='store_false', help='use CUDA') parser.add_argument('--bptt', type=int, default=5000, help='sequence length') parser.add_argument('--window', type=int, default=3785, help='pointer window length') parser.add_argument('--theta', type=float, default=0.6625523432485668, help='mix between uniform distribution and pointer softmax distribution over previous words') parser.add_argument('--lambdasm', type=float, default=0.12785920428335693, help='linear mix between only pointer (1) and only vocab (0) distribution') args = parser.parse_args() ############################################################################### # Load data ############################################################################### corpus = data.Corpus(args.data) eval_batch_size = 1 test_batch_size = 1 #train_data = batchify(corpus.train, args.batch_size) val_data = batchify(corpus.valid, test_batch_size, args) test_data = batchify(corpus.test, test_batch_size, args) ############################################################################### # Build the model ############################################################################### ntokens = len(corpus.dictionary) criterion = nn.CrossEntropyLoss() def one_hot(idx, size, cuda=True): a = np.zeros((1, size), np.float32) a[0][idx] = 1 v = Variable(torch.from_numpy(a)) if cuda: v = v.cuda() return v def evaluate(data_source, batch_size=10, window=args.window): # Turn on evaluation mode which disables dropout. if args.model == 'QRNN': model.reset() model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) next_word_history = None pointer_history = None for i in range(0, data_source.size(0) - 1, args.bptt): if i > 0: print(i, len(data_source), math.exp(total_loss / i)) data, targets = get_batch(data_source, i, evaluation=True, args=args) output, hidden, rnn_outs, _ = model(data, hidden, return_h=True) rnn_out = rnn_outs[-1].squeeze() output_flat = output.view(-1, ntokens) ### # Fill pointer history start_idx = len(next_word_history) if next_word_history is not None else 0 next_word_history = torch.cat([one_hot(t.data[0], ntokens) for t in targets]) if next_word_history is None else torch.cat([next_word_history, torch.cat([one_hot(t.data[0], ntokens) for t in targets])]) #print(next_word_history) pointer_history = Variable(rnn_out.data) if pointer_history is None else torch.cat([pointer_history, Variable(rnn_out.data)], dim=0) #print(pointer_history) ### # Built-in cross entropy # total_loss += len(data) * criterion(output_flat, targets).data[0] ### # Manual cross entropy # softmax_output_flat = torch.nn.functional.softmax(output_flat) # soft = torch.gather(softmax_output_flat, dim=1, index=targets.view(-1, 1)) # entropy = -torch.log(soft) # total_loss += len(data) * entropy.mean().data[0] ### # Pointer manual cross entropy loss = 0 softmax_output_flat = torch.nn.functional.softmax(output_flat) for idx, vocab_loss in enumerate(softmax_output_flat): p = vocab_loss if start_idx + idx > window: valid_next_word = next_word_history[start_idx + idx - window:start_idx + idx] valid_pointer_history = pointer_history[start_idx + idx - window:start_idx + idx] logits = torch.mv(valid_pointer_history, rnn_out[idx]) theta = args.theta ptr_attn = torch.nn.functional.softmax(theta * logits).view(-1, 1) ptr_dist = (ptr_attn.expand_as(valid_next_word) * valid_next_word).sum(0).squeeze() lambdah = args.lambdasm p = lambdah * ptr_dist + (1 - lambdah) * vocab_loss ### target_loss = p[targets[idx].data] loss += (-torch.log(target_loss)).data[0] total_loss += loss / batch_size ### hidden = repackage_hidden(hidden) next_word_history = next_word_history[-window:] pointer_history = pointer_history[-window:] return total_loss / len(data_source) # Load the best saved model. with open(args.save, 'rb') as f: if not args.cuda: model = torch.load(f, map_location=lambda storage, loc: storage) else: model = torch.load(f) print(model) # Run on val data. val_loss = evaluate(val_data, test_batch_size) print('=' * 89) print('| End of pointer | val loss {:5.2f} | val ppl {:8.2f}'.format( val_loss, math.exp(val_loss))) print('=' * 89) # Run on test data. test_loss = evaluate(test_data, test_batch_size) print('=' * 89) print('| End of pointer | test loss {:5.2f} | test ppl {:8.2f}'.format( test_loss, math.exp(test_loss))) print('=' * 89) ================================================ FILE: awd-lstm-lm/request_devbox.json ================================================ { "dockerImage":"tmiconi_rl", "tag":"master-test-2018_11_27_11_33_25", "cpus":2.0, "ramMB":26000, "gpus":1, "diskMB":8000, "cluster":"opusprodda3e", "environment":"devel", "user":"tmiconi", "resourcePool": "/ailabs/p1/tmiconi", "instances":1, "isService":false, "cronSchedule":"", "custom":{}, "application":"testversion", "maxRetries":1, "constraints":{"sku":"p40_24gb"}, "accessTypes":[], "dependencies":[], "cronCollisionPolicy":"CANCEL_NEW", "emailOnFail":[], "emailOnSucceed":[] } ================================================ FILE: awd-lstm-lm/request_full.json ================================================ { "dockerImage":"tmiconi_rl", "tag":"master-test-2019_1_22_14_38_35", "name":"PLASTICLSTM_bs6_clip2_cliptype_clip_alphatype_full_modultype_modplasth2mod_modulout_fanout_asgdtime_85_1067n_5r", "cpus":2.0, "cmdLine":"export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \u0026\u0026 PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/nvidia/bin:/opt/hadoop/latest/bin \u0026\u0026 export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64/ \u0026\u0026 export LC_ALL=C.UTF-8 \u0026\u0026 export LANG=C.UTF-8 \u0026\u0026 cd /home/work/ \u0026\u0026 bash ./OpusPrepare.sh \u0026\u0026 source /.bashrc \u0026\u0026 pyenv local 3.5.2 \u0026\u0026 python main.py --batch_size 6 --data data/penn --dropouti 0.4 --dropouth 0.25 --epoch 300 --save PTB.pt --wdrop 0 --model PLASTICLSTM --modultype modplasth2mod --modulout fanout --nhid 1067 --alphatype full --asgdtime 85 --clipval 2.0 --cliptype clip --seed {{mesos.instance}} ", "ramMB":25000, "gpus":1, "diskMB":6000, "cluster":"opusprodda1", "environment":"devel", "user":"tmiconi", "resourcePool": "/ailabs/p1/tmiconi", "instances":5, "isService":false, "cronSchedule":"", "custom":{}, "application":"testversion", "maxRetries":1, "constraints":{"sku":"p6000"}, "accessTypes":[], "dependencies":[], "cronCollisionPolicy":"CANCEL_NEW", "emailOnFail":[], "emailOnSucceed":[] } ================================================ FILE: awd-lstm-lm/request_opus.json ================================================ { "dockerImage":"tmiconi_rl", "tag":"master-test-2019_3_13_17_37_3", "name":"newcode_SqUsq_clp2_PLASTICLSTM_agdiv1150_opus_alphatype_full_modultype_modplasth2mod_modulout_fanout_asgdtime_125_1068n_5run", "cpus":2.0, "cmdLine":"export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \u0026\u0026 PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/nvidia/bin:/opt/hadoop/latest/bin \u0026\u0026 export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64/ \u0026\u0026 export LC_ALL=C.UTF-8 \u0026\u0026 export LANG=C.UTF-8 \u0026\u0026 cd /home/work/ \u0026\u0026 apt-get install unzip \u0026\u0026 sh ./getdata.sh \u0026\u0026 python3 main.py --batch_size 6 --data data/penn --dropouti 0.4 --dropouth 0.25 --epoch 500 --save PTB.pt --wdrop 0 --model PLASTICLSTM --modultype modplasth2mod --modulout fanout --nhid 1068 --alphatype full --asgdtime 125 --agdiv 1150 --seed {{mesos.instance}} ", "ramMB":25000, "gpus":1, "diskMB":6000, "cluster":"opusprodda1", "environment":"devel", "user":"tmiconi", "resourcePool": "/ailabs/p1/tmiconi", "instances":5, "isService":false, "cronSchedule":"", "custom":{}, "application":"testversion", "maxRetries":1, "constraints":{"sku":"p6000"}, "accessTypes":[], "dependencies":[], "cronCollisionPolicy":"CANCEL_NEW", "emailOnFail":[], "emailOnSucceed":[] } ================================================ FILE: awd-lstm-lm/request_opus.json.old ================================================ { "dockerImage":"tmiconi_rl", "tag":"master-test-2018_12_11_15_39_4", "name":"PLSTM_plastin_bs3_clip2_opus_alphatype_perneuron_modultype_modplasth2mod_modulout_fanout_asgdtime_65_1149n_5run", "cpus":2.0, "cmdLine":"export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \u0026\u0026 PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/nvidia/bin:/opt/hadoop/latest/bin \u0026\u0026 export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64/ \u0026\u0026 export LC_ALL=C.UTF-8 \u0026\u0026 export LANG=C.UTF-8 \u0026\u0026 cd /home/work/ \u0026\u0026 apt-get install unzip \u0026\u0026 sh ./getdata.sh \u0026\u0026 python3 main.py --batch_size 3 --data data/penn --dropouti 0.4 --dropouth 0.25 --epoch 300 --save PTB.pt --wdrop 0 --model PLASTICLSTM --modultype modplasth2mod --modulout fanout --nhid 1149 --alphatype perneuron --asgdtime 65 --clipval 2.0 --seed {{mesos.instance}} ", "ramMB":25000, "gpus":1, "diskMB":6000, "cluster":"opusprodda1", "environment":"devel", "user":"tmiconi", "resourcePool": "/ailabs/p1/tmiconi", "instances":5, "isService":false, "cronSchedule":"", "custom":{}, "application":"testversion", "maxRetries":1, "constraints":{"sku":"p6000"}, "accessTypes":[], "dependencies":[], "cronCollisionPolicy":"CANCEL_NEW", "emailOnFail":[], "emailOnSucceed":[] } ================================================ FILE: awd-lstm-lm/request_plast.json ================================================ { "dockerImage":"tmiconi_rl", "tag":"master-test-2018_12_11_15_39_4", "name":"PLSTM_plastin_bs3_clip2_opus_alphatype_perneuron_modultype_nomodul_modulout_single_asgdtime_44_1149n_5run", "cpus":2.0, "cmdLine":"export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \u0026\u0026 PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/nvidia/bin:/opt/hadoop/latest/bin \u0026\u0026 export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64/ \u0026\u0026 export LC_ALL=C.UTF-8 \u0026\u0026 export LANG=C.UTF-8 \u0026\u0026 cd /home/work/ \u0026\u0026 apt-get install unzip \u0026\u0026 sh ./getdata.sh \u0026\u0026 python3 main.py --batch_size 3 --data data/penn --dropouti 0.4 --dropouth 0.25 --epoch 300 --save PTB.pt --wdrop 0 --model PLASTICLSTM --modultype none --modulout single --nhid 1149 --alphatype perneuron --asgdtime 44 --clipval 2.0 --seed {{mesos.instance}} ", "ramMB":25000, "gpus":1, "diskMB":6000, "cluster":"opusprodda1", "environment":"devel", "user":"tmiconi", "resourcePool": "/ailabs/p1/tmiconi", "instances":5, "isService":false, "cronSchedule":"", "custom":{}, "application":"testversion", "maxRetries":1, "constraints":{"sku":"p6000"}, "accessTypes":[], "dependencies":[], "cronCollisionPolicy":"CANCEL_NEW", "emailOnFail":[], "emailOnSucceed":[] } ================================================ FILE: awd-lstm-lm/splitcross.py ================================================ from collections import defaultdict import torch import torch.nn as nn import numpy as np class SplitCrossEntropyLoss(nn.Module): r'''SplitCrossEntropyLoss calculates an approximate softmax''' def __init__(self, hidden_size, splits, verbose=False): # We assume splits is [0, split1, split2, N] where N >= |V| # For example, a vocab of 1000 words may have splits [0] + [100, 500] + [inf] super(SplitCrossEntropyLoss, self).__init__() self.hidden_size = hidden_size self.splits = [0] + splits + [100 * 1000000] self.nsplits = len(self.splits) - 1 self.stats = defaultdict(list) self.verbose = verbose # Each of the splits that aren't in the head require a pretend token, we'll call them tombstones # The probability given to this tombstone is the probability of selecting an item from the represented split if self.nsplits > 1: self.tail_vectors = nn.Parameter(torch.zeros(self.nsplits - 1, hidden_size)) self.tail_bias = nn.Parameter(torch.zeros(self.nsplits - 1)) def logprob(self, weight, bias, hiddens, splits=None, softmaxed_head_res=None, verbose=False): # First we perform the first softmax on the head vocabulary and the tombstones if softmaxed_head_res is None: start, end = self.splits[0], self.splits[1] head_weight = None if end - start == 0 else weight[start:end] head_bias = None if end - start == 0 else bias[start:end] # We only add the tombstones if we have more than one split if self.nsplits > 1: head_weight = self.tail_vectors if head_weight is None else torch.cat([head_weight, self.tail_vectors]) head_bias = self.tail_bias if head_bias is None else torch.cat([head_bias, self.tail_bias]) # Perform the softmax calculation for the word vectors in the head for all splits # We need to guard against empty splits as torch.cat does not like random lists head_res = torch.nn.functional.linear(hiddens, head_weight, bias=head_bias) softmaxed_head_res = torch.nn.functional.log_softmax(head_res, dim=-1) if splits is None: splits = list(range(self.nsplits)) results = [] running_offset = 0 for idx in splits: # For those targets in the head (idx == 0) we only need to return their loss if idx == 0: results.append(softmaxed_head_res[:, :-(self.nsplits - 1)]) # If the target is in one of the splits, the probability is the p(tombstone) * p(word within tombstone) else: start, end = self.splits[idx], self.splits[idx + 1] tail_weight = weight[start:end] tail_bias = bias[start:end] # Calculate the softmax for the words in the tombstone tail_res = torch.nn.functional.linear(hiddens, tail_weight, bias=tail_bias) # Then we calculate p(tombstone) * p(word in tombstone) # Adding is equivalent to multiplication in log space head_entropy = (softmaxed_head_res[:, -idx]).contiguous() tail_entropy = torch.nn.functional.log_softmax(tail_res, dim=-1) results.append(head_entropy.view(-1, 1) + tail_entropy) if len(results) > 1: return torch.cat(results, dim=1) return results[0] def split_on_targets(self, hiddens, targets): # Split the targets into those in the head and in the tail split_targets = [] split_hiddens = [] # Determine to which split each element belongs (for each start split value, add 1 if equal or greater) # This method appears slower at least for WT-103 values for approx softmax #masks = [(targets >= self.splits[idx]).view(1, -1) for idx in range(1, self.nsplits)] #mask = torch.sum(torch.cat(masks, dim=0), dim=0) ### # This is equally fast for smaller splits as method below but scales linearly mask = None for idx in range(1, self.nsplits): partial_mask = targets >= self.splits[idx] mask = mask + partial_mask if mask is not None else partial_mask ### #masks = torch.stack([targets] * (self.nsplits - 1)) #mask = torch.sum(masks >= self.split_starts, dim=0) for idx in range(self.nsplits): # If there are no splits, avoid costly masked select if self.nsplits == 1: split_targets, split_hiddens = [targets], [hiddens] continue # If all the words are covered by earlier targets, we have empties so later stages don't freak out if sum(len(t) for t in split_targets) == len(targets): split_targets.append([]) split_hiddens.append([]) continue # Are you in our split? tmp_mask = mask == idx split_targets.append(torch.masked_select(targets, tmp_mask)) split_hiddens.append(hiddens.masked_select(tmp_mask.unsqueeze(1).expand_as(hiddens)).view(-1, hiddens.size(1))) return split_targets, split_hiddens def forward(self, weight, bias, hiddens, targets, verbose=False): if self.verbose or verbose: for idx in sorted(self.stats): print('{}: {}'.format(idx, int(np.mean(self.stats[idx]))), end=', ') print() total_loss = None if len(hiddens.size()) > 2: hiddens = hiddens.view(-1, hiddens.size(2)) split_targets, split_hiddens = self.split_on_targets(hiddens, targets) # First we perform the first softmax on the head vocabulary and the tombstones start, end = self.splits[0], self.splits[1] head_weight = None if end - start == 0 else weight[start:end] head_bias = None if end - start == 0 else bias[start:end] # We only add the tombstones if we have more than one split if self.nsplits > 1: head_weight = self.tail_vectors if head_weight is None else torch.cat([head_weight, self.tail_vectors]) head_bias = self.tail_bias if head_bias is None else torch.cat([head_bias, self.tail_bias]) # Perform the softmax calculation for the word vectors in the head for all splits # We need to guard against empty splits as torch.cat does not like random lists combo = torch.cat([split_hiddens[i] for i in range(self.nsplits) if len(split_hiddens[i])]) ### all_head_res = torch.nn.functional.linear(combo, head_weight, bias=head_bias) softmaxed_all_head_res = torch.nn.functional.log_softmax(all_head_res, dim=-1) if self.verbose or verbose: self.stats[0].append(combo.size()[0] * head_weight.size()[0]) running_offset = 0 for idx in range(self.nsplits): # If there are no targets for this split, continue if len(split_targets[idx]) == 0: continue # For those targets in the head (idx == 0) we only need to return their loss if idx == 0: softmaxed_head_res = softmaxed_all_head_res[running_offset:running_offset + len(split_hiddens[idx])] entropy = -torch.gather(softmaxed_head_res, dim=1, index=split_targets[idx].view(-1, 1)) # If the target is in one of the splits, the probability is the p(tombstone) * p(word within tombstone) else: softmaxed_head_res = softmaxed_all_head_res[running_offset:running_offset + len(split_hiddens[idx])] if self.verbose or verbose: start, end = self.splits[idx], self.splits[idx + 1] tail_weight = weight[start:end] self.stats[idx].append(split_hiddens[idx].size()[0] * tail_weight.size()[0]) # Calculate the softmax for the words in the tombstone tail_res = self.logprob(weight, bias, split_hiddens[idx], splits=[idx], softmaxed_head_res=softmaxed_head_res) # Then we calculate p(tombstone) * p(word in tombstone) # Adding is equivalent to multiplication in log space head_entropy = softmaxed_head_res[:, -idx] # All indices are shifted - if the first split handles [0,...,499] then the 500th in the second split will be 0 indexed indices = (split_targets[idx] - self.splits[idx]).view(-1, 1) # Warning: if you don't squeeze, you get an N x 1 return, which acts oddly with broadcasting tail_entropy = torch.gather(torch.nn.functional.log_softmax(tail_res, dim=-1), dim=1, index=indices).squeeze() entropy = -(head_entropy + tail_entropy) ### running_offset += len(split_hiddens[idx]) total_loss = entropy.float().sum() if total_loss is None else total_loss + entropy.float().sum() return (total_loss / len(targets)).type_as(weight) if __name__ == '__main__': np.random.seed(42) torch.manual_seed(42) if torch.cuda.is_available(): torch.cuda.manual_seed(42) V = 8 H = 10 N = 100 E = 10 embed = torch.nn.Embedding(V, H) crit = SplitCrossEntropyLoss(hidden_size=H, splits=[V // 2]) bias = torch.nn.Parameter(torch.ones(V)) optimizer = torch.optim.SGD(list(embed.parameters()) + list(crit.parameters()), lr=1) for _ in range(E): prev = torch.autograd.Variable((torch.rand(N, 1) * 0.999 * V).int().long()) x = torch.autograd.Variable((torch.rand(N, 1) * 0.999 * V).int().long()) y = embed(prev).squeeze() c = crit(embed.weight, bias, y, x.view(N)) print('Crit', c.exp().data[0]) logprobs = crit.logprob(embed.weight, bias, y[:2]).exp() print(logprobs) print(logprobs.sum(dim=1)) optimizer.zero_grad() c.backward() optimizer.step() ================================================ FILE: awd-lstm-lm/test.py ================================================ import OpusHdfsCopy from OpusHdfsCopy import transferFileToHdfsDir, checkHdfs import argparse import time import math import numpy as np import torch import torch.nn as nn from torch.autograd import Variable import pdb import data import model from utils import batchify, get_batch, repackage_hidden torch.nn.Module.dump_patches=True parser = argparse.ArgumentParser(description='PyTorch PennTreeBank RNN/LSTM Language Model Testing of Saved Models') parser.add_argument('--file', type=str, default='', help='name of the file containing the saved model to be tested') parser.add_argument('--data', type=str, default='data/penn/', help='location of the data corpus') parser.add_argument('--model', type=str, default='LSTM', help='type of recurrent net (LSTM, QRNN, GRU)') parser.add_argument('--alphatype', type=str, default='full', help="type of alpha matrix: (full, fanout)") parser.add_argument('--modultype', type=str, default='none', help="type of modulation: (none, modplasth2mod, modplastc2mod)") parser.add_argument('--modulout', type=str, default='single', help="modulatory output (single or fanout)") parser.add_argument('--cliptype', type=str, default='clip', help="clip type (decay, clip, aditya)") parser.add_argument('--hebboutput', type=str, default='i2c', help='output used for hebbian computations (i2c, h2co, cell, hidden)') parser.add_argument('--emsize', type=int, default=400, help='size of word embeddings') parser.add_argument('--nhid', type=int, default=1150, help='number of hidden units per layer') parser.add_argument('--nlayers', type=int, default=3, help='number of layers') parser.add_argument('--lr', type=float, default=30, help='initial learning rate') parser.add_argument('--clip', type=float, default=0.25, help='gradient clipping') parser.add_argument('--numgpu', type=int, default=0, help='which GPU to use? (no effect if GPU not used at all)') parser.add_argument('--epochs', type=int, default=8000, help='upper epoch limit') parser.add_argument('--batch_size', type=int, default=80, metavar='N', help='batch size') parser.add_argument('--bptt', type=int, default=70, help='sequence length') parser.add_argument('--dropout', type=float, default=0.4, help='dropout applied to layers (0 = no dropout)') parser.add_argument('--dropouth', type=float, default=0.3, help='dropout for rnn layers (0 = no dropout)') parser.add_argument('--dropouti', type=float, default=0.65, help='dropout for input embedding layers (0 = no dropout)') parser.add_argument('--dropoute', type=float, default=0.1, help='dropout to remove words from embedding layer (0 = no dropout)') parser.add_argument('--wdrop', type=float, default=0.5, help='amount of weight dropout to apply to the RNN hidden to hidden matrix') parser.add_argument('--seed', type=int, default=1111, help='random seed') parser.add_argument('--nonmono', type=int, default=5, help='random seed') parser.add_argument('--cuda', action='store_false', help='use CUDA') parser.add_argument('--log-interval', type=int, default=200, metavar='N', help='report interval') randomhash = ''.join(str(time.time()).split('.')) parser.add_argument('--save', type=str, default=randomhash+'.pt', help='path to save the final model') parser.add_argument('--alpha', type=float, default=2, help='alpha L2 regularization on RNN activation (alpha = 0 means no regularization)') parser.add_argument('--beta', type=float, default=1, help='beta slowness regularization applied on RNN activiation (beta = 0 means no regularization)') parser.add_argument('--wdecay', type=float, default=1.2e-6, help='weight decay applied to all weights') parser.add_argument('--resume', type=str, default='', help='path of model to resume') parser.add_argument('--optimizer', type=str, default='sgd', help='optimizer to use (sgd, adam)') parser.add_argument('--when', nargs="+", type=int, default=[-1], help='When (which epochs) to divide the learning rate by 10 - accepts multiple') args = parser.parse_args() args.tied = True # Set the random seed manually for reproducibility. np.random.seed(args.seed) torch.manual_seed(args.seed) if torch.cuda.is_available(): if not args.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") else: torch.cuda.manual_seed(args.seed) ############################################################################### # Load data ############################################################################### def model_save(fn): with open(fn, 'wb') as f: torch.save([model, criterion, optimizer], f) def model_load(fn): global model, criterion, optimizer with open(fn, 'rb') as f: model, criterion, optimizer = torch.load(f, map_location=torch.device(args.numgpu)) import platform print("Torch version:", torch.__version__, "Numpy version:", np.version.version, "Python version:", platform.python_version()) import os import hashlib fn = 'corpus.{}.data'.format(hashlib.md5(args.data.encode()).hexdigest()) if os.path.exists(fn): print('Loading cached dataset...') corpus = torch.load(fn) else: print('Producing dataset...') corpus = data.Corpus(args.data) torch.save(corpus, fn) eval_batch_size = 10 test_batch_size = 1 train_data = batchify(corpus.train, args.batch_size, args) val_data = batchify(corpus.valid, eval_batch_size, args) test_data = batchify(corpus.test, test_batch_size, args) #train_data = train_data[:5000,:] # For debugging ############################################################################### # Build the model ############################################################################### from splitcross import SplitCrossEntropyLoss criterion = None ntokens = len(corpus.dictionary) myparams={} myparams['cliptype'] = args.cliptype myparams['modultype'] = args.modultype myparams['modulout'] = args.modulout myparams['hebboutput'] = args.hebboutput myparams['alphatype'] = args.alphatype suffix = args.model+'_'+myparams['cliptype']+'_'+myparams['modultype']+'_'+myparams['modulout']+'_'+myparams['hebboutput']+'_'+myparams['alphatype']+'_lr'+str(args.lr)+'_'+str(args.nlayers)+'l_'+str(args.nhid)+'h' RESULTSFILENAME = 'results_'+suffix+'.txt' MODELFILENAME = args.file ### if not criterion: splits = [] if ntokens > 500000: # One Billion # This produces fairly even matrix mults for the buckets: # 0: 11723136, 1: 10854630, 2: 11270961, 3: 11219422 splits = [4200, 35000, 180000] elif ntokens > 75000: # WikiText-103 splits = [2800, 20000, 76000] print('Using', splits) criterion = SplitCrossEntropyLoss(args.emsize, splits=splits, verbose=False) ### #params = list(model.parameters()) + list(criterion.parameters()) #if args.cuda: # model = model.cuda() # criterion = criterion.cuda() # params = list(model.parameters()) + list(criterion.parameters()) #### #total_params = sum(x.size()[0] * x.size()[1] if len(x.size()) > 1 else x.size()[0] for x in params if x.size()) #print('Args:', args) #print('Model total parameters:', total_params) ############################################################################### # Training code ############################################################################### def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() with torch.no_grad(): if args.model == 'QRNN': model.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets).data hidden = repackage_hidden(hidden) #return total_loss[0] / len(data_source) return total_loss / len(data_source) # Loop over epochs. lr = args.lr best_val_loss = [] stored_loss = 100000000 print("MyParams:", myparams) print("Args:", args) # Load the best saved model. model_load(MODELFILENAME) NUMGPU = args.numgpu params = list(model.parameters()) + list(criterion.parameters()) if args.cuda: model = model.cuda(device=NUMGPU) criterion = criterion.cuda(device=NUMGPU) params = list(model.parameters()) + list(criterion.parameters()) ### total_params = sum(x.numel() for x in params)# if x.numel()) print('Args:', args) print('Model total parameters:', total_params) #pdb.set_trace() # Run on test data. test_loss = evaluate(test_data, test_batch_size) print('=' * 89) print('| End of training | test loss {:5.2f} | test ppl {:8.2f} | test bpc {:8.3f}'.format( test_loss, math.exp(test_loss), test_loss / math.log(2))) print('=' * 89) ================================================ FILE: awd-lstm-lm/tmp.py ================================================ import torch from torch import nn from torch.autograd import Variable import torch.nn.functional as F import numpy as np import pdb class PlasticLSTM(nn.Module): def __init__(self, isize, hsize, params): super(PlasticLSTM, self).__init__() self.softmax= torch.nn.functional.softmax #if params['activ'] == 'tanh': self.activ = F.tanh ok=0 if 'cliptype' in params: self.cliptype = params['cliptype'] ok+=1 if 'modultype' in params: self.modultype = params['modultype'] ok+=1 if 'hebboutput' in params: self.hebboutput = params['hebboutput'] ok+=1 if 'modulout' in params: self.modulout= params['modulout'] ok+=1 if 'alphatype' in params: self.alphatype= params['alphatype'] ok+=1 if ok < 5: raise ValueError('When using PlasticLSTM, must specify cliptype, modultype, modulout, alphatype and hebboutput in params') # Plastic connection parameters: self.w = torch.nn.Parameter(.02 * torch.rand(hsize, hsize) - .01) if self.alphatype == 'fanout': self.alpha = torch.nn.Parameter(.001 * torch.ones(1)) #torch.rand(1,1,hsize)) else: self.alpha = torch.nn.Parameter(.00001 * torch.rand(hsize, hsize)) if self.modultype == 'none': self.eta = torch.nn.Parameter(.01 * torch.ones(1)) # Everyone has the same eta (Note: if a parameter is not actually used, there can be problems with ASGD handling in main.py) #self.eta = .01 self.h2f = torch.nn.Linear(hsize, hsize) self.h2i = torch.nn.Linear(hsize, hsize) self.h2opt = torch.nn.Linear(hsize, hsize) #self.h2c = torch.nn.Linear(hsize, hsize) # This (equivalent to Whg in the PyTorch docs, Uc in Wikipedia) is replaced by the plastic connection self.x2f = torch.nn.Linear(isize, hsize) self.x2opt = torch.nn.Linear(isize, hsize) self.x2i = torch.nn.Linear(isize, hsize) self.x2c = torch.nn.Linear(isize, hsize) if self.modultype != 'none': self.h2mod = torch.nn.Linear(hsize, 1) # Although called 'h2mod', it may take input from h or c depending on modultype value if self.modulout == 'fanout': self.modfanout = torch.nn.Linear(1, hsize) self.isize = isize self.hsize = hsize def forward(self, inputs, hidden): #, hebb, et, pw): # hidden is a tuple of h, c and hebb hebb = hidden[2] fgt = F.sigmoid(self.x2f(inputs) + self.h2f(hidden[0])) ipt = F.sigmoid(self.x2i(inputs) + self.h2i(hidden[0])) opt = F.sigmoid(self.x2opt(inputs) + self.h2opt(hidden[0])) #cell = torch.mul(fgt, hidden[1]) + torch.mul(ipt, F.tanh(self.x2c(inputs) + self.h2c(hidden[0]))) # To implement plasticity, we replace h2c / Whg / Uc with a plastic connection composed of w, alpha and hebb # Note that h2c / Whg / Uc is the matrix of weights that takes in the # previous time-step h, and whose output (after adding the current input # and passing through tanh) is multiplied by the input gates before being # added to the cell state if self.cliptype == 'aditya': # Each *column* in w, hebb and alpha constitutes the inputs to a single cell # For w and alpha, columns are 2nd dimension (i.e. dim 1); for hebb, it's dimension 2 (dimension 0 is batch) h2coutput = hidden[0].unsqueeze(1).bmm(self.w + torch.mul(self.alpha, torch.clamp(hebb, min=-1.0, max=1.0))).squeeze() else: h2coutput = hidden[0].unsqueeze(1).bmm(self.w + torch.mul(self.alpha, hebb)).squeeze() #if np.random.rand() < .1: # pdb.set_trace() inputstocell = F.tanh(self.x2c(inputs) + h2coutput) #inputstocell = F.tanh(self.x2c(inputs) + torch.matmul(hidden[0].unsqueeze(1), self.w.unsqueeze(0)).squeeze(1)) cell = torch.mul(fgt, hidden[1]) + torch.mul(ipt, inputstocell) # self.h2c(hidden[0]))) #pdb.set_trace() hactiv = torch.mul(opt, F.tanh(cell)) #pdb.set_trace() if self.hebboutput == 'i2c': deltahebb = torch.bmm(hidden[0].unsqueeze(2), inputstocell.unsqueeze(1)) elif self.hebboutput == 'h2co': deltahebb = torch.bmm(hidden[0].unsqueeze(2), h2coutput.unsqueeze(1)) elif self.hebboutput == 'cell': deltahebb = torch.bmm(hidden[0].unsqueeze(2), cell.unsqueeze(1)) elif self.hebboutput == 'hidden': deltahebb = torch.bmm(hidden[0].unsqueeze(2), hactiv.unsqueeze(1)) else: raise ValueError("Must choose Hebbian target output") if self.modultype == 'none': myeta = self.eta elif self.modultype == 'modplasth2mod': myeta = F.tanh(self.h2mod(hactiv)).unsqueeze(2) # Shape: BatchSize x 1 x 1 elif self.modultype == 'modplastc2mod': myeta = F.tanh(self.h2mod(cell)).unsqueeze(2) else: raise ValueError("Must choose modulation type") #pdb.set_trace() if self.modultype != 'none' and self.modulout == 'fanout': # Each *column* in w, hebb and alpha constitutes the inputs to a single cell # For w and alpha, columns are 2nd dimension (i.e. dim 1); for hebb, it's dimension 2 (dimension 0 is batch) # The output of the following line has shape BatchSize x 1 x NHidden, i.e. 1 line and NHidden columns for each # batch element. When multiplying by hebb (BatchSize x NHidden x NHidden), broadcasting will provide a different # value for each cell but the same value for all inputs of a cell, as required by fanout concept. myeta = self.modfanout(myeta).squeeze().unsqueeze(1) if self.cliptype == 'decay': hebb = (1 - myeta) * hebb + myeta * deltahebb elif self.cliptype == 'clip': hebb = torch.clamp(hebb + myeta * deltahebb, min=-1.0, max=1.0) elif self.cliptype == 'aditya': hebb = hebb + myeta * deltahebb else: raise ValueError("Must choose clip type") hidden = (hactiv, cell, hebb) activout = hactiv #self.h2o(hactiv) #if np.isnan(np.sum(hactiv.data.cpu().numpy())) or np.isnan(np.sum(hidden[1].data.cpu().numpy())) : # raise ValueError("Nan detected !") return activout, hidden #, hebb, et, pw class MyLSTM(nn.Module): def __init__(self, isize, hsize): super(MyLSTM, self).__init__() self.softmax= torch.nn.functional.softmax #if params['activ'] == 'tanh': self.activ = F.tanh self.h2f = torch.nn.Linear(hsize, hsize) self.h2i = torch.nn.Linear(hsize, hsize) self.h2opt = torch.nn.Linear(hsize, hsize) self.h2c = torch.nn.Linear(hsize, hsize) self.x2f = torch.nn.Linear(isize, hsize) self.x2opt = torch.nn.Linear(isize, hsize) self.x2i = torch.nn.Linear(isize, hsize) self.x2c = torch.nn.Linear(isize, hsize) self.isize = isize self.hsize = hsize def forward(self, inputs, hidden): #, hebb, et, pw): # hidden is a tuple of h and c states fgt = F.sigmoid(self.x2f(inputs) + self.h2f(hidden[0])) ipt = F.sigmoid(self.x2i(inputs) + self.h2i(hidden[0])) opt = F.sigmoid(self.x2opt(inputs) + self.h2opt(hidden[0])) cell = torch.mul(fgt, hidden[1]) + torch.mul(ipt, F.tanh(self.x2c(inputs) + self.h2c(hidden[0]))) hactiv = torch.mul(opt, F.tanh(cell)) #pdb.set_trace() hidden = (hactiv, cell) activout = hactiv #self.h2o(hactiv) #if np.isnan(np.sum(hactiv.data.cpu().numpy())) or np.isnan(np.sum(hidden[1].data.cpu().numpy())) : # raise ValueError("Nan detected !") #pdb.set_trace() return activout, hidden #, hebb, et, pw ================================================ FILE: awd-lstm-lm/utils.py ================================================ import torch #from torch.autograd import Variable def repackage_hidden(h): """Wraps hidden states in new Tensors, to detach them from their history.""" #if type(h) == Variable: #return Variable(h.data) if isinstance(h, torch.Tensor): return h.detach() else: return tuple(repackage_hidden(v) for v in h) def batchify(data, bsz, args): # Work out how cleanly we can divide the dataset into bsz parts. nbatch = data.size(0) // bsz # Trim off any extra elements that wouldn't cleanly fit (remainders). data = data.narrow(0, 0, nbatch * bsz) # Evenly divide the data across the bsz batches. data = data.view(bsz, -1).t().contiguous() if args.cuda: data = data.cuda(device=args.numgpu) return data def get_batch(source, i, args, seq_len=None, evaluation=False): seq_len = min(seq_len if seq_len else args.bptt, len(source) - 1 - i) data = source[i:i+seq_len] target = source[i+1:i+1+seq_len].view(-1) return data, target ================================================ FILE: awd-lstm-lm/weight_drop.py ================================================ import torch from torch.nn import Parameter from functools import wraps class WeightDrop(torch.nn.Module): def __init__(self, module, weights, dropout=0, variational=False): super(WeightDrop, self).__init__() self.module = module self.weights = weights self.dropout = dropout self.variational = variational self._setup() def widget_demagnetizer_y2k_edition(*args, **kwargs): # We need to replace flatten_parameters with a nothing function # It must be a function rather than a lambda as otherwise pickling explodes # We can't write boring code though, so ... WIDGET DEMAGNETIZER Y2K EDITION! # (╯°□°)╯︵ ┻━┻ return def _setup(self): # Terrible temporary solution to an issue regarding compacting weights re: CUDNN RNN if issubclass(type(self.module), torch.nn.RNNBase): self.module.flatten_parameters = self.widget_demagnetizer_y2k_edition for name_w in self.weights: print('Applying weight drop of {} to {}'.format(self.dropout, name_w)) w = getattr(self.module, name_w) del self.module._parameters[name_w] self.module.register_parameter(name_w + '_raw', Parameter(w.data)) def _setweights(self): for name_w in self.weights: raw_w = getattr(self.module, name_w + '_raw') w = None if self.variational: mask = torch.autograd.Variable(torch.ones(raw_w.size(0), 1)) if raw_w.is_cuda: mask = mask.cuda() mask = torch.nn.functional.dropout(mask, p=self.dropout, training=True) w = mask.expand_as(raw_w) * raw_w else: w = torch.nn.functional.dropout(raw_w, p=self.dropout, training=self.training) setattr(self.module, name_w, w) def forward(self, *args): self._setweights() return self.module.forward(*args) if __name__ == '__main__': import torch from weight_drop import WeightDrop # Input is (seq, batch, input) x = torch.autograd.Variable(torch.randn(2, 1, 10)).cuda() h0 = None ### print('Testing WeightDrop') print('=-=-=-=-=-=-=-=-=-=') ### print('Testing WeightDrop with Linear') lin = WeightDrop(torch.nn.Linear(10, 10), ['weight'], dropout=0.9) lin.cuda() run1 = [x.sum() for x in lin(x).data] run2 = [x.sum() for x in lin(x).data] print('All items should be different') print('Run 1:', run1) print('Run 2:', run2) assert run1[0] != run2[0] assert run1[1] != run2[1] print('---') ### print('Testing WeightDrop with LSTM') wdrnn = WeightDrop(torch.nn.LSTM(10, 10), ['weight_hh_l0'], dropout=0.9) wdrnn.cuda() run1 = [x.sum() for x in wdrnn(x, h0)[0].data] run2 = [x.sum() for x in wdrnn(x, h0)[0].data] print('First timesteps should be equal, all others should differ') print('Run 1:', run1) print('Run 2:', run2) # First time step, not influenced by hidden to hidden weights, should be equal assert run1[0] == run2[0] # Second step should not assert run1[1] != run2[1] print('---') ================================================ FILE: images/OpusHdfsCopy.py ================================================ # Uber-only code for interacting with hdfs # # Copyright (c) 2018 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import os.path def checkHdfs(): return os.path.isfile('/opt/hadoop/latest/bin/hdfs') def transferFileToHdfsPath(sourcepath, targetpath): hdfspath = targetpath targetdir = os.path.dirname(targetpath) os.system('/opt/hadoop/latest/bin/hdfs dfs -mkdir -p {}'.format(targetdir)) result = os.system( '/opt/hadoop/latest/bin/hdfs dfs -copyFromLocal -f {} {}'.format(sourcepath, hdfspath) ) if result != 0: raise OSError('Cannot copyFromLocal {} {} returned {}'.format(sourcepath, hdfspath, result)) def transferFileToHdfsDir(sourcepath, targetdir): hdfspath = os.path.join(targetdir, os.path.basename(sourcepath)) os.system('/opt/hadoop/latest/bin/hdfs dfs -mkdir -p {}'.format(targetdir)) result = os.system( '/opt/hadoop/latest/bin/hdfs dfs -copyFromLocal -f {} {}'.format(sourcepath, hdfspath) ) if result != 0: raise OSError('Cannot copyFromLocal {} {} returned {}'.format(sourcepath, hdfspath, result)) ================================================ FILE: images/README.md ================================================ ## Images This code implements the image completion task: three images are shown several times, then one of the image is half-erased and presented, and the network must reconstruct the missing portion of the image. To run this code, you must download the [CIFAR10 dataset](https://www.cs.toronto.edu/~kriz/cifar.html) (Python version), and copy the `data_batch_*` files into this directory. ================================================ FILE: images/anim.py ================================================ # Make an animation from the activities of the network over time # # Copyright (c) 2018 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import torch import torch.nn as nn from torch.autograd import Variable import numpy as np from numpy import random import torch.nn.functional as F import scipy import scipy.misc from torch import optim import random import sys import pickle import pdb import time import numpy as np import matplotlib.pyplot as plt import matplotlib.animation as animation import glob np.set_printoptions(precision=3) import images as pics from images import Network fig = plt.figure() plt.axis('off') # Note that this is a different file from the ones used in training with open('./data_batch_5', 'rb') as fo: imagedict = pickle.load(fo, encoding='bytes') imagedata = imagedict[b'data'] #suffix = 'eta_prestime_20_probadegrade_0.5_interpresdelay_2_learningrate_0.0001_prestimetest_3_rngseed_0_nbiter_50000_nbprescycles_3_inputboost_1.0_eta_0.01_nbpatterns_3_patternsize_1024' # This one used for first draft of the paper, rngseed 4 #suffix = 'eta_inputboost_1.0_learningrate_0.0001_nbprescycles_3_interpresdelay_2_eta_0.01_rngseed_0_probadegrade_0.5_nbiter_150000_nbpatterns_3_prestimetest_3_patternsize_1024_prestime_20' #suffix="eta_nbpatterns_3_inputboost_1.0_nbprescycles_3_prestime_20_prestimetest_5_interpresdelay_2_patternsize_1024_nbiter_50000_probadegrade_0.5_learningrate_0.0001_eta_0.01_rngseed_0" suffix='etarefiner_eta_0.01_nbpatterns_3_interpresdelay_2_patternsize_1024_prestime_20_learningrate_1e-05_nbprescycles_3_rngseed_0_prestimetest_3_probadegrade_0.5_inputboost_1.0_nbiter_150000' #fn = './tmp/results_'+suffix+'.dat' fn = './results_'+suffix+'.dat' with open(fn, 'rb') as fo: myw = pickle.load(fo) myalpha = pickle.load(fo) myeta = pickle.load(fo) myall_losses = pickle.load(fo) myparams = pickle.load(fo) net = Network(myparams) #np.random.seed(params['rngseed']); random.seed(params['rngseed']); torch.manual_seed(params['rngseed']) #rngseed=18 #rngseed=4 rngseed=7 np.random.seed(rngseed); random.seed(rngseed); torch.manual_seed(rngseed) #print myall_losses ttype = torch.cuda.FloatTensor # Must match the one in pics_eta.py #ttype = torch.FloatTensor # Must match the one in pics_eta.py net.w.data = torch.from_numpy(myw).type(ttype) net.alpha.data = torch.from_numpy(myalpha).type(ttype) net.eta.data = torch.from_numpy(myeta).type(ttype) print(net.w.data[:10,:10]) print(net.eta.data) NBPICS = 1 # 10 nn=1 imagesize = int(np.sqrt(myparams['patternsize'])) outputs={} FILLINGSTEPS = myparams['prestimetest'] + myparams['interpresdelay'] + 1 # Two ways to do it : show the full actual process, or show a "simnplified" version where you just show the three images and the pattern completion (slowed down) SIMPLIFIED = 0 if SIMPLIFIED: for numpic in range(NBPICS): print("Pattern", numpic) z = np.random.rand() z = np.random.rand() inputsTensor, targetPattern = pics.generateInputsAndTarget(myparams, contiguousperturbation=True) y = net.initialZeroState() hebb = net.initialZeroHebb() net.zeroDiagAlpha() ax_imgs = [] print("Running the episode...") for numstep in range(myparams['nbsteps']): y, hebb = net(Variable(inputsTensor[numstep], requires_grad=False), y, hebb) output = y.data.cpu().numpy()[0][:-1].reshape((imagesize, imagesize)) #output = scipy.misc.imresize(output, 4.0) #plt.subplot(NBPICS, FILLINGSTEPS, nn) #plt.axis('off') #plt.imshow(output, cmap='gray', vmin=-1.0, vmax=1.0) #if numstep == 1 or numstep == myparams['prestime'] + myparams['interpresdelay'] + 1 or \ #numstep == 2 * (myparams['prestime'] + myparams['interpresdelay']) + 1 or \ # Show the last set of 3 patterns, and the completion: if numstep == myparams['nbsteps'] - myparams['prestimetest'] - myparams['interpresdelay'] - 2 or \ numstep == myparams['nbsteps'] - myparams['prestimetest'] - (myparams['interpresdelay'] + myparams['prestime']) - myparams['interpresdelay'] - 2 or \ numstep == myparams['nbsteps'] - myparams['prestimetest'] - (myparams['interpresdelay'] + myparams['prestime']) *2 - myparams['interpresdelay'] - 2 or \ numstep >= myparams['nbsteps'] - myparams['prestimetest'] : if numstep == myparams['nbsteps'] - myparams['prestimetest'] : output_half = output.copy() output_half[16:,:] = 0 # NOTE: we are assuming that the grayed part will be the bottom one, which is only true for half the cases a1 = plt.imshow(output_half, animated=True, cmap='gray', vmin=-1.0, vmax=1.0) else: a1 = plt.imshow(output, animated=True, cmap='gray', vmin=-1.0, vmax=1.0) #a2 = plt.text(1, 1, str(numstep)+"/"+str(myparams['nbsteps']), fontsize=12, color='r') if numstep < myparams['nbsteps'] - myparams['prestimetest'] : a3 = plt.text(1, 1, "Pattern "+str(nn), fontsize=12, color='r') else: a3 = plt.text(1, 1, "Pattern completion", fontsize=12, color='r') ax_imgs.append([a1, a3]) #ax_imgs.append([fullimg]) nn += 1 #scipy.misc.imsave('pic'+str(numpic)+'_'+str(numstep)+'.png', output) #plt.show(block=True) print("Writing out the animation file") anim = animation.ArtistAnimation(fig, ax_imgs, repeat_delay=2000) # repeat_delay is ignored... anim.save('anim_short_'+str(numpic)+'.gif', writer='imagemagick', fps=1) # All images could be rotated 90deg. This allows us to display each set as a # vertical column by rotating the final image 90 degrees too. #output = y.data.cpu().numpy()[0][:-1].reshape((imagesize, imagesize)) #pattern1 = inputsTensor.cpu().numpy()[0][0][:-1].reshape((imagesize, imagesize)) #pattern2 = inputsTensor.cpu().numpy()[myparams['prestime']+myparams['interpresdelay']+1][0][:-1].reshape((imagesize, imagesize)) #pattern3 = inputsTensor.cpu().numpy()[2*(myparams['prestime']+myparams['interpresdelay'])+1][0][:-1].reshape((imagesize, imagesize)) #blankedpattern = inputsTensor.cpu().numpy()[-1][0][:-1].reshape((imagesize, imagesize)) #plt.subplot(NBPICS,5,nn) #plt.axis('off') #plt.imshow(pattern1, cmap='gray', vmin=-1.0, vmax=1.0) #plt.subplot(NBPICS,5,nn+1) #plt.axis('off') #plt.imshow(pattern2, cmap='gray', vmin=-1.0, vmax=1.0) #plt.subplot(NBPICS,5,nn+2) #plt.axis('off') #plt.imshow(pattern3, cmap='gray', vmin=-1.0, vmax=1.0) #plt.subplot(NBPICS,5,nn+3) #plt.axis('off') #plt.imshow(blankedpattern, cmap='gray', vmin=-1.0, vmax=1.0) #plt.subplot(NBPICS,5,nn+4) #plt.imshow(output, cmap='gray', vmin=-1.0, vmax=1.0) #plt.axis('off') #nn += 5 #td = targetPattern.cpu().numpy() #yd = y.data.cpu().numpy()[0][:-1] #absdiff = np.abs(td-yd) #print("Mean / median / max abs diff:", np.mean(absdiff), np.median(absdiff), np.max(absdiff)) #print("Correlation (full / sign): ", np.corrcoef(td, yd)[0][1], np.corrcoef(np.sign(td), np.sign(yd))[0][1]) ##print inputs[numstep] #plt.subplots_adjust(wspace=.1, hspace=.1) else: for numpic in range(NBPICS): print("Pattern", numpic) z = np.random.rand() z = np.random.rand() inputsTensor, targetPattern = pics.generateInputsAndTarget(myparams, contiguousperturbation=True) y = net.initialZeroState() hebb = net.initialZeroHebb() net.zeroDiagAlpha() ax_imgs = [] print("Running the episode...") for numstep in range(myparams['nbsteps']): y, hebb = net(Variable(inputsTensor[numstep], requires_grad=False), y, hebb) output = y.data.cpu().numpy()[0][:-1].reshape((imagesize, imagesize)) #output = scipy.misc.imresize(output, 4.0) #plt.subplot(NBPICS, FILLINGSTEPS, nn) #plt.axis('off') #plt.imshow(output, cmap='gray', vmin=-1.0, vmax=1.0) a1 = plt.imshow(output, animated=True, cmap='gray', vmin=-1.0, vmax=1.0) a2 = plt.text(1, 1, str(numstep)+"/"+str(myparams['nbsteps']), fontsize=12, color='r') if numstep < myparams['nbsteps'] - myparams['prestimetest'] - 1: a3 = plt.text(14, 1, "Pattern presentations", fontsize=12, color='r') else: a3 = plt.text(14, 1, "Pattern completion", fontsize=12, color='r') ax_imgs.append([a1, a2, a3]) #ax_imgs.append([fullimg]) nn += 1 #scipy.misc.imsave('pic'+str(numpic)+'_'+str(numstep)+'.png', output) # Post-completion, keep the last image up a bit for numstep_add in range(50): a1 = plt.imshow(output, animated=True, cmap='gray', vmin=-1.0, vmax=1.0) a2 = plt.text(1, 1, str(myparams['nbsteps'])+"/"+str(myparams['nbsteps']), fontsize=12, color='r') a3 = plt.text(14, 1, "Pattern completion", fontsize=12, color='r') ax_imgs.append([a1, a2, a3]) #plt.show(block=True) print("Writing out the animation file") anim = animation.ArtistAnimation(fig, ax_imgs, repeat_delay=2000) # repeat_delay is ignored... anim.save('anim_full_'+str(numpic)+'.gif', writer='imagemagick', fps=10) # All images could be rotated 90deg. This allows us to display each set as a # vertical column by rotating the final image 90 degrees too. #output = y.data.cpu().numpy()[0][:-1].reshape((imagesize, imagesize)) #pattern1 = inputsTensor.cpu().numpy()[0][0][:-1].reshape((imagesize, imagesize)) #pattern2 = inputsTensor.cpu().numpy()[myparams['prestime']+myparams['interpresdelay']+1][0][:-1].reshape((imagesize, imagesize)) #pattern3 = inputsTensor.cpu().numpy()[2*(myparams['prestime']+myparams['interpresdelay'])+1][0][:-1].reshape((imagesize, imagesize)) #blankedpattern = inputsTensor.cpu().numpy()[-1][0][:-1].reshape((imagesize, imagesize)) #plt.subplot(NBPICS,5,nn) #plt.axis('off') #plt.imshow(pattern1, cmap='gray', vmin=-1.0, vmax=1.0) #plt.subplot(NBPICS,5,nn+1) #plt.axis('off') #plt.imshow(pattern2, cmap='gray', vmin=-1.0, vmax=1.0) #plt.subplot(NBPICS,5,nn+2) #plt.axis('off') #plt.imshow(pattern3, cmap='gray', vmin=-1.0, vmax=1.0) #plt.subplot(NBPICS,5,nn+3) #plt.axis('off') #plt.imshow(blankedpattern, cmap='gray', vmin=-1.0, vmax=1.0) #plt.subplot(NBPICS,5,nn+4) #plt.imshow(output, cmap='gray', vmin=-1.0, vmax=1.0) #plt.axis('off') #nn += 5 #td = targetPattern.cpu().numpy() #yd = y.data.cpu().numpy()[0][:-1] #absdiff = np.abs(td-yd) #print("Mean / median / max abs diff:", np.mean(absdiff), np.median(absdiff), np.max(absdiff)) #print("Correlation (full / sign): ", np.corrcoef(td, yd)[0][1], np.corrcoef(np.sign(td), np.sign(yd))[0][1]) ##print inputs[numstep] #plt.subplots_adjust(wspace=.1, hspace=.1) ================================================ FILE: images/images.py ================================================ # Differentiable plasticity: natural image memorization and reconstruction. # # Copyright (c) 2018 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # This program uses the click module rather than argparse to scan command-line arguments. I won't do that again. # You start getting acceptable results after ~3000 episodes (~15 minutes with a standard GPU). Let it run longer for better results. # To observe the results, run testpics.py (which uses the output files produced by this program) import torch import torch.nn as nn from torch.autograd import Variable import click import numpy as np from numpy import random import torch.nn.functional as F from torch import optim import random import sys import pickle import pdb import time import os import platform # Uber-only: #import OpusHdfsCopy #from OpusHdfsCopy import transferFileToHdfsDir, checkHdfs # Loading the image data. This requires downloading the CIFAR 10 dataset (Python version) - https://www.cs.toronto.edu/~kriz/cifar.html imagedata=np.zeros((0, 1024*3)) for numfile in range(4): with open('./data_batch_'+str(numfile+1), 'rb') as fo: #imagedict = pickle.load(fo) # Python 2 imagedict = pickle.load(fo, encoding='bytes') # Python 3 imagedata = np.concatenate((imagedata, imagedict[b'data']), axis=0) np.set_printoptions(precision=4) defaultParams = { 'nbpatterns': 3, # number of images per episode 'nbprescycles': 3, # number of presentations for each image 'prestime': 20, # number of time steps for each image presentation 'prestimetest': 3, # number of time steps for the test (degraded) image 'interpresdelay': 2, # number of time steps (with zero input) between two presentations 'patternsize': 1024, # size of the images (32 x 32 = 1024) 'nbiter': 100000, # number of episodes 'probadegrade': .5, # when contiguousperturbation is False (which it shouldn't be), probability of zeroing each pixel in the test image 'lr': 1e-4, # Adam learning rate 'print_every': 10, # how often to print statistics and save files 'homogenous': 0, # whether alpha should be shared across connections 'rngseed':0 # random seed } #ttype = torch.FloatTensor; # For CPU ttype = torch.cuda.FloatTensor; # For GPU # Generate the full list of inputs for an episode def generateInputsAndTarget(params, contiguousperturbation=True): #print(("Input Boost:", params['inputboost'])) inputT = np.zeros((params['nbsteps'], 1, params['nbneur'])) #inputTensor, initially in numpy format... # Create the random patterns to be memorized in an episode # Floating-point, graded patterns, zero-mean patterns=[] for nump in range(params['nbpatterns']): numpic = np.random.randint(imagedata.shape[0]) p = imagedata[numpic].reshape((3, 1024)).sum(0).astype(float) p = p[:params['patternsize']] p = p - np.mean(p) p = p / (1e-8+np.max(np.abs(p))) #p = (np.random.randint(2, size=params['patternsize']) - .5) *2 # Binary patterns patterns.append(p) #print "patterns generated!" # Now 'patterns' contains the NBPATTERNS patterns to be memorized in this episode - in numpy format # Creating the test pattern, partially zero'ed out, that the network will have to complete testpattern = random.choice(patterns).copy() preservedbits = np.ones(params['patternsize']) if contiguousperturbation: # Contiguous perturbation = one contiguous half of the image is zeroed out. Default (see above). preservedbits[int(params['patternsize']/2):] = 0 if np.random.rand() < .5: preservedbits = 1 - preservedbits else: # Otherwise, randomly zero out individual pixels. Because natural images are highly autocorrelated, a trivial approximate solution is to take the average of nearby pixels. preservedbits[:int(params['probadegrade'] * params['patternsize'])] = 0; np.random.shuffle(preservedbits) degradedtestpattern = testpattern * preservedbits # Inserting the inputs in the input tensor at the proper places for nc in range(params['nbprescycles']): np.random.shuffle(patterns) for ii in range(params['nbpatterns']): for nn in range(params['prestime']): numi =nc * (params['nbpatterns'] * (params['prestime']+params['interpresdelay'])) + ii * (params['prestime']+params['interpresdelay']) + nn inputT[numi][0][:params['patternsize']] = patterns[ii][:] for nn in range(params['prestimetest']): inputT[-params['prestimetest'] + nn][0][:params['patternsize']] = degradedtestpattern[:] for nn in range(params['nbsteps']): inputT[nn][0][-1] = 1.0 # Bias neuron is forced to 1 #inputT[nn] *= params['inputboost'] # Strengthen inputs inputT = torch.from_numpy(inputT).type(ttype) # Convert from numpy to Tensor target = torch.from_numpy(testpattern).type(ttype) return inputT, target class Network(nn.Module): def __init__(self, params): super(Network, self).__init__() # Notice that the vectors are row vectors, and the matrices are transposed wrt the comp neuro order, following deep learning / pytorch conventions # Each *column* of w targets a single output neuron self.w = Variable(.01 * torch.randn(params['nbneur'], params['nbneur']).type(ttype), requires_grad=True) # fixed (baseline) weights if params['homogenous'] == 1: self.alpha = Variable(.01 * torch.ones(1).type(ttype), requires_grad=True) # plasticity coefficients: homogenous/shared across connections else: self.alpha = Variable(.01 * torch.randn(params['nbneur'], params['nbneur']).type(ttype),requires_grad=True) # plasticity coefficients: independent self.eta = Variable(.01 * torch.ones(1).type(ttype), requires_grad=True) # "learning rate" of plasticity, shared across all connections self.params = params def forward(self, input, yin, hebb): # Inputs are fed by clamping the output of cells that receive input at the input value, like in standard Hopfield networks # clamps = torch.zeros(1, self.params['nbneur']) clamps = np.zeros(self.params['nbneur']) zz = torch.nonzero(input.data[0].cpu()).numpy().squeeze() #print(zz, zz.shape) clamps[zz] = 1 #print(clamps) clamps = Variable(torch.from_numpy(clamps).type(ttype), requires_grad=False).float() yout = F.tanh( yin.mm(self.w + torch.mul(self.alpha, hebb))) * (1 - clamps) + input * clamps hebb = (1 - self.eta) * hebb + self.eta * torch.bmm(yin.unsqueeze(2), yout.unsqueeze(1))[0] # bmm used to implement outer product return yout, hebb def initialZeroState(self): return Variable(torch.zeros(1, self.params['nbneur']).type(ttype)) def initialZeroHebb(self): return Variable(torch.zeros(self.params['nbneur'], self.params['nbneur']).type(ttype)) def train(paramdict=None): #params = dict(click.get_current_context().params) print("Starting training...") params = {} params.update(defaultParams) if paramdict: params.update(paramdict) print("Passed params: ", params) print(platform.uname()) sys.stdout.flush() params['nbsteps'] = params['nbprescycles'] * ((params['prestime'] + params['interpresdelay']) * params['nbpatterns']) + params['prestimetest'] # Total number of steps per episode params['nbneur'] = params['patternsize'] + 1 suffix = "images_"+"".join([str(x)+"_" if pair[0] is not 'nbneur' and pair[0] is not 'nbsteps' and pair[0] is not 'print_every' and pair[0] is not 'rngseed' else '' for pair in zip(params.keys(), params.values()) for x in pair])[:-1] + '_rngseed_'+str(params['rngseed']) # Turning the parameters into a nice suffix for filenames; rngseed always appears last # Initialize random seeds (first two redundant?) print("Setting random seeds") np.random.seed(params['rngseed']); random.seed(params['rngseed']); torch.manual_seed(params['rngseed']) #print(click.get_current_context().params) print("Initializing network") net = Network(params) total_loss = 0.0 print("Initializing optimizer") optimizer = torch.optim.Adam([net.w, net.alpha, net.eta], lr=params['lr']) all_losses = [] #print_every = 20 nowtime = time.time() print("Starting episodes...") sys.stdout.flush() for numiter in range(params['nbiter']): # print("Iter ", numiter) # sys.stdout.flush() y = net.initialZeroState() hebb = net.initialZeroHebb() optimizer.zero_grad() inputs, target = generateInputsAndTarget(params) # Running the episode for numstep in range(params['nbsteps']): y, hebb = net(Variable(inputs[numstep], requires_grad=False), y, hebb) # Computing gradients, applying optimizer loss = (y[0][:params['patternsize']] - Variable(target, requires_grad=False)).pow(2).sum() loss.backward() optimizer.step() lossnum = loss.data[0] total_loss += lossnum # Printing statistics, saving files if (numiter+1) % params['print_every'] == 0: print(numiter, "====") td = target.cpu().numpy() yd = y.data.cpu().numpy()[0][:-1] print("y: ", yd[:10]) print("target: ", td[:10]) #print("target: ", target.unsqueeze(0)[0][:10]) absdiff = np.abs(td-yd) print("Mean / median / max abs diff:", np.mean(absdiff), np.median(absdiff), np.max(absdiff)) print("Correlation (full / sign): ", np.corrcoef(td, yd)[0][1], np.corrcoef(np.sign(td), np.sign(yd))[0][1]) #print inputs[numstep] previoustime = nowtime nowtime = time.time() print("Time spent on last", params['print_every'], "iters: ", nowtime - previoustime) total_loss /= params['print_every'] all_losses.append(total_loss) print("Mean loss over last", params['print_every'], "iters:", total_loss) print("Saving local files...") sys.stdout.flush() with open('results_'+suffix+'.dat', 'wb') as fo: pickle.dump(net.w.data.cpu().numpy(), fo) pickle.dump(net.alpha.data.cpu().numpy(), fo) pickle.dump(net.eta.data.cpu().numpy(), fo) pickle.dump(all_losses, fo) pickle.dump(params, fo) print("ETA:", net.eta.data.cpu().numpy()) with open('loss_'+suffix+'.txt', 'w') as thefile: for item in all_losses: thefile.write("%s\n" % item) # Uber-only #print("Saving HDFS files...") #if checkHdfs(): # print("Transfering to HDFS...") # transferFileToHdfsDir('results_'+suffix+'.dat', '/ailabs/tmiconi/exp/') # transferFileToHdfsDir('loss_'+suffix+'.txt', '/ailabs/tmiconi/exp/') sys.stdout.flush() sys.stderr.flush() total_loss = 0 @click.command() @click.option('--nbpatterns', default=defaultParams['nbpatterns']) @click.option('--nbprescycles', default=defaultParams['nbprescycles']) @click.option('--homogenous', default=defaultParams['prestime']) @click.option('--prestime', default=defaultParams['prestime']) @click.option('--prestimetest', default=defaultParams['prestimetest']) @click.option('--interpresdelay', default=defaultParams['interpresdelay']) @click.option('--patternsize', default=defaultParams['patternsize']) @click.option('--nbiter', default=defaultParams['nbiter']) @click.option('--probadegrade', default=defaultParams['probadegrade']) @click.option('--lr', default=defaultParams['lr']) @click.option('--print_every', default=defaultParams['print_every']) @click.option('--rngseed', default=defaultParams['rngseed']) def main(nbpatterns, nbprescycles, homogenous, prestime, prestimetest, interpresdelay, patternsize, nbiter, probadegrade, lr, print_every, rngseed): train(paramdict=dict(click.get_current_context().params)) #print(dict(click.get_current_context().params)) if __name__ == "__main__": #train() main() ================================================ FILE: images/plotresults.py ================================================ import numpy as np import glob import matplotlib.pyplot as plt fnames = glob.glob('./tmp/loss_simple_*.txt') #fnames = glob.glob('./tmp/loss_api_*.txt') #fnames = glob.glob('./tmp/loss_fixed_*.txt') plt.ion() plt.figure(figsize=(5,4)) # Smaller figure = relative larger fonts fulllosses=[] losses=[] lgts=[] for fn in fnames: z = np.loadtxt(fn) lgts.append(len(z)) fulllosses.append(z) minlen = min(lgts) for z in fulllosses: losses.append(z[:minlen]) losses = np.array(losses) meanl = np.mean(losses, axis=0) stdl = np.std(losses, axis=0) highl = np.max(losses, axis=0) lowl = np.min(losses, axis=0) #highl = meanl+stdl #lowl = meanl-stdl xx = range(len(meanl)) # xticks and labels xt = range(0, len(meanl), 50) xtl = [str(10*i) for i in xt] plt.fill_between(xx, lowl, highl, color='blue', alpha=.5) plt.plot(meanl, color='blue') #plt.xlabel('Loss (sum square diff. b/w final output and target)') plt.xlabel('Number of Episodes') plt.ylabel('Loss') plt.xticks(xt, xtl) plt.tight_layout() ================================================ FILE: images/request.json ================================================ { "dockerImage":"test_tm", "tag":"master-test-2017_10_31_17_22_28", "name":"PicsAPIToCompareWithFixed", "cpus":2.0, "cmdLine":"export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \u0026\u0026 PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/nvidia/bin:/opt/hadoop/latest/bin \u0026\u0026 export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64/ \u0026\u0026 export LC_ALL=C.UTF-8 \u0026\u0026 export LANG=C.UTF-8 \u0026\u0026 cd /home/work/ \u0026\u0026 python3 pics_api.py --rngseed {{mesos.instance}}", "ramMB":8000, "gpus":1, "diskMB":1000, "cluster":"opusprodda1", "environment":"devel", "user":"root", "instances":10, "isService":false, "cronSchedule":"", "custom":{}, "application":"testversion", "maxRetries":3, "constraints":{"sku":"1080ti"}, "accessTypes":[], "dependencies":[], "cronCollisionPolicy":"CANCEL_NEW", "emailOnFail":[], "emailOnSucceed":[] } ================================================ FILE: images/showcompletion_eta.py ================================================ # Old code to show the dynamics of pattern completion : show the product of the network at each time step # Useful to understand how the network works (i.e. the need to clear up remnant activity from previous stimuli) # May require adjustments to work (e.g. change file names, etc.) # # Copyright (c) 2018 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import torch import torch.nn as nn from torch.autograd import Variable import numpy as np from numpy import random import torch.nn.functional as F import scipy import scipy.misc from torch import optim import random import sys import pickle import pdb import time np.set_printoptions(precision=3) import matplotlib.pyplot as plt plt.ion() import images as pics from images import Network #plt.figure() # Note that this is a different file from the ones used in training with open('../data_batch_5', 'rb') as fo: imagedict = pickle.load(fo, encoding='bytes') imagedata = imagedict[b'data'] #suffix = 'eta_prestime_20_probadegrade_0.5_interpresdelay_2_learningrate_0.0001_prestimetest_3_rngseed_0_nbiter_50000_nbprescycles_3_inputboost_1.0_eta_0.01_nbpatterns_3_patternsize_1024' # This one used for first draft of the paper, rngseed 4 #suffix = 'eta_inputboost_1.0_learningrate_0.0001_nbprescycles_3_interpresdelay_2_eta_0.01_rngseed_0_probadegrade_0.5_nbiter_150000_nbpatterns_3_prestimetest_3_patternsize_1024_prestime_20' #suffix="eta_nbpatterns_3_inputboost_1.0_nbprescycles_3_prestime_20_prestimetest_5_interpresdelay_2_patternsize_1024_nbiter_50000_probadegrade_0.5_learningrate_0.0001_eta_0.01_rngseed_0" suffix='etarefiner_eta_0.01_nbpatterns_3_interpresdelay_2_patternsize_1024_prestime_20_learningrate_1e-05_nbprescycles_3_rngseed_0_prestimetest_3_probadegrade_0.5_inputboost_1.0_nbiter_150000' #fn = './tmp/results_'+suffix+'.dat' fn = './results_'+suffix+'.dat' with open(fn, 'rb') as fo: myw = pickle.load(fo) myalpha = pickle.load(fo) myeta = pickle.load(fo) myall_losses = pickle.load(fo) myparams = pickle.load(fo) net = Network(myparams) #np.random.seed(params['rngseed']); random.seed(params['rngseed']); torch.manual_seed(params['rngseed']) #rngseed=4 rngseed=7 np.random.seed(rngseed); random.seed(rngseed); torch.manual_seed(rngseed) #print myall_losses ttype = torch.cuda.FloatTensor # Must match the one in pics_eta.py #ttype = torch.FloatTensor # Must match the one in pics_eta.py net.w.data = torch.from_numpy(myw).type(ttype) net.alpha.data = torch.from_numpy(myalpha).type(ttype) net.eta.data = torch.from_numpy(myeta).type(ttype) print(net.w.data[:10,:10]) print(net.eta.data) NBPICS = 10 nn=1 imagesize = int(np.sqrt(myparams['patternsize'])) outputs={} plt.figure() FILLINGSTEPS = myparams['prestimetest'] + myparams['interpresdelay'] + 1 for numpic in range(NBPICS): print("Pattern", numpic) z = np.random.rand() z = np.random.rand() inputsTensor, targetPattern = pics.generateInputsAndTarget(myparams, contiguousperturbation=True) y = net.initialZeroState() hebb = net.initialZeroHebb() net.zeroDiagAlpha() for numstep in range(myparams['nbsteps']): y, hebb = net(Variable(inputsTensor[numstep], requires_grad=False), y, hebb) if numstep >= myparams['nbsteps'] - FILLINGSTEPS: output = y.data.cpu().numpy()[0][:-1].reshape((imagesize, imagesize)) #output = scipy.misc.imresize(output, 4.0) plt.subplot(NBPICS, FILLINGSTEPS, nn) plt.axis('off') plt.imshow(output, cmap='gray', vmin=-1.0, vmax=1.0) nn += 1 #scipy.misc.imsave('pic'+str(numpic)+'_'+str(numstep)+'.png', output) plt.show(block=True) # All images could be rotated 90deg. This allows us to display each set as a # vertical column by rotating the final image 90 degrees too. #output = y.data.cpu().numpy()[0][:-1].reshape((imagesize, imagesize)) #pattern1 = inputsTensor.cpu().numpy()[0][0][:-1].reshape((imagesize, imagesize)) #pattern2 = inputsTensor.cpu().numpy()[myparams['prestime']+myparams['interpresdelay']+1][0][:-1].reshape((imagesize, imagesize)) #pattern3 = inputsTensor.cpu().numpy()[2*(myparams['prestime']+myparams['interpresdelay'])+1][0][:-1].reshape((imagesize, imagesize)) #blankedpattern = inputsTensor.cpu().numpy()[-1][0][:-1].reshape((imagesize, imagesize)) #plt.subplot(NBPICS,5,nn) #plt.axis('off') #plt.imshow(pattern1, cmap='gray', vmin=-1.0, vmax=1.0) #plt.subplot(NBPICS,5,nn+1) #plt.axis('off') #plt.imshow(pattern2, cmap='gray', vmin=-1.0, vmax=1.0) #plt.subplot(NBPICS,5,nn+2) #plt.axis('off') #plt.imshow(pattern3, cmap='gray', vmin=-1.0, vmax=1.0) #plt.subplot(NBPICS,5,nn+3) #plt.axis('off') #plt.imshow(blankedpattern, cmap='gray', vmin=-1.0, vmax=1.0) #plt.subplot(NBPICS,5,nn+4) #plt.imshow(output, cmap='gray', vmin=-1.0, vmax=1.0) #plt.axis('off') #nn += 5 #td = targetPattern.cpu().numpy() #yd = y.data.cpu().numpy()[0][:-1] #absdiff = np.abs(td-yd) #print("Mean / median / max abs diff:", np.mean(absdiff), np.median(absdiff), np.max(absdiff)) #print("Correlation (full / sign): ", np.corrcoef(td, yd)[0][1], np.corrcoef(np.sign(td), np.sign(yd))[0][1]) ##print inputs[numstep] #plt.subplots_adjust(wspace=.1, hspace=.1) ================================================ FILE: images/testpics.py ================================================ # Generate a figure that shows a number of episodes # # Copyright (c) 2018 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import torch import torch.nn as nn from torch.autograd import Variable import numpy as np from numpy import random import torch.nn.functional as F from torch import optim import random import sys import pickle import pdb import time np.set_printoptions(precision=3) import matplotlib.pyplot as plt plt.ion() import images as pics from images import Network plt.figure() # Note that this is a different file from the ones used in training with open('./data_batch_5', 'rb') as fo: imagedict = pickle.load(fo, encoding='bytes') imagedata = imagedict[b'data'] suffix='images_patternsize_1024_interpresdelay_2_nbpatterns_3_lr_0.0001_nbprescycles_3_homogenous_20_nbiter_100000_prestime_20_probadegrade_0.5_prestimetest_3_rngseed_0' #fn = './tmp/results_'+suffix+'.dat' fn = './results_'+suffix+'.dat' with open(fn, 'rb') as fo: myw = pickle.load(fo) myalpha = pickle.load(fo) myeta = pickle.load(fo) myall_losses = pickle.load(fo) myparams = pickle.load(fo) net = Network(myparams) #np.random.seed(params['rngseed']); random.seed(params['rngseed']); torch.manual_seed(params['rngseed']) #rngseed=4 rngseed=4 np.random.seed(rngseed); random.seed(rngseed); torch.manual_seed(rngseed) #print myall_losses ttype = torch.cuda.FloatTensor # Must match the one in pics_eta.py #ttype = torch.FloatTensor # Must match the one in pics_eta.py net.w.data = torch.from_numpy(myw).type(ttype) net.alpha.data = torch.from_numpy(myalpha).type(ttype) net.eta.data = torch.from_numpy(myeta).type(ttype) print(net.w.data[:10,:10]) print(net.eta.data) NBPICS = 7 nn=1 for numpic in range(NBPICS): print("Pattern", numpic) inputsTensor, targetPattern = pics.generateInputsAndTarget(myparams, contiguousperturbation=True) y = net.initialZeroState() hebb = net.initialZeroHebb() #net.zeroDiagAlpha() for numstep in range(myparams['nbsteps']): y, hebb = net(Variable(inputsTensor[numstep], requires_grad=False), y, hebb) # All images could be rotated 90deg. This allows us to display each set as a # vertical column by rotating the final image 90 degrees too. imagesize = int(np.sqrt(myparams['patternsize'])) output = y.data.cpu().numpy()[0][:-1].reshape((imagesize, imagesize)) pattern1 = inputsTensor.cpu().numpy()[0][0][:-1].reshape((imagesize, imagesize)) pattern2 = inputsTensor.cpu().numpy()[myparams['prestime']+myparams['interpresdelay']+1][0][:-1].reshape((imagesize, imagesize)) pattern3 = inputsTensor.cpu().numpy()[2*(myparams['prestime']+myparams['interpresdelay'])+1][0][:-1].reshape((imagesize, imagesize)) blankedpattern = inputsTensor.cpu().numpy()[-1][0][:-1].reshape((imagesize, imagesize)) #output = y.data.cpu().numpy()[0][:-1].reshape((imagesize, imagesize)).T #pattern1 = inputsTensor.cpu().numpy()[0][0][:-1].reshape((imagesize, imagesize)).T #pattern2 = inputsTensor.cpu().numpy()[myparams['prestime']+myparams['interpresdelay']+1][0][:-1].reshape((imagesize, imagesize)).T #pattern3 = inputsTensor.cpu().numpy()[2*(myparams['prestime']+myparams['interpresdelay'])+1][0][:-1].reshape((imagesize, imagesize)).T #blankedpattern = inputsTensor.cpu().numpy()[-1][0][:-1].reshape((imagesize, imagesize)).T plt.subplot(NBPICS,5,nn) plt.axis('off') plt.imshow(pattern1, cmap='gray', vmin=-1.0, vmax=1.0) plt.subplot(NBPICS,5,nn+1) plt.axis('off') plt.imshow(pattern2, cmap='gray', vmin=-1.0, vmax=1.0) plt.subplot(NBPICS,5,nn+2) plt.axis('off') plt.imshow(pattern3, cmap='gray', vmin=-1.0, vmax=1.0) plt.subplot(NBPICS,5,nn+3) plt.axis('off') plt.imshow(blankedpattern, cmap='gray', vmin=-1.0, vmax=1.0) plt.subplot(NBPICS,5,nn+4) plt.imshow(output, cmap='gray', vmin=-1.0, vmax=1.0) plt.axis('off') nn += 5 td = targetPattern.cpu().numpy() yd = y.data.cpu().numpy()[0][:-1] absdiff = np.abs(td-yd) print("Mean / median / max abs diff:", np.mean(absdiff), np.median(absdiff), np.max(absdiff)) print("Correlation (full / sign): ", np.corrcoef(td, yd)[0][1], np.corrcoef(np.sign(td), np.sign(yd))[0][1]) #print inputs[numstep] plt.subplots_adjust(wspace=.1, hspace=.1) ================================================ FILE: maze/OpusHdfsCopy.py ================================================ # Uber-only code for interacting with hdfs # # Copyright (c) 2018 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import os.path def checkHdfs(): return os.path.isfile('/opt/hadoop/latest/bin/hdfs') def transferFileToHdfsPath(sourcepath, targetpath): hdfspath = targetpath targetdir = os.path.dirname(targetpath) os.system('/opt/hadoop/latest/bin/hdfs dfs -mkdir -p {}'.format(targetdir)) result = os.system( '/opt/hadoop/latest/bin/hdfs dfs -copyFromLocal -f {} {}'.format(sourcepath, hdfspath) ) if result != 0: raise OSError('Cannot copyFromLocal {} {} returned {}'.format(sourcepath, hdfspath, result)) def transferFileToHdfsDir(sourcepath, targetdir): hdfspath = os.path.join(targetdir, os.path.basename(sourcepath)) os.system('/opt/hadoop/latest/bin/hdfs dfs -mkdir -p {}'.format(targetdir)) result = os.system( '/opt/hadoop/latest/bin/hdfs dfs -copyFromLocal -f {} {}'.format(sourcepath, hdfspath) ) if result != 0: raise OSError('Cannot copyFromLocal {} {} returned {}'.format(sourcepath, hdfspath, result)) ================================================ FILE: maze/README.md ================================================ # Maze task This code performs the grid-maze task, in which the agent must locate a reward and then navigate back to it repeatedly (while being randomly relocated each time it finds it). # Episode 1: ![Animation](AnimBad.gif "Agent at episode 1") # Episode 300000: ![Animation](AnimGood.gif "Agent at episode 300000") ======= # Grid Maze task The agent's task is to hit the (invisible) reward location as many times as possible within a fixed number time steps. Because the reward location is randomized at the start of each episode, and the agent is randomly teleported every time it hits the reward, the agent must discover and memorize the reward location for each episode. The agent's only inputs consist of a 3x3 neighborhood around the agent's location, as well as the reward obtained (if any) and the action chosen at the previous time step. The outer-loop metal-learning algorithm is Advantage Actor critic. All within-episode learning occurs through the self-modulated plasticity of network connections. For a simpler (but less flexible) implementation of the same task, see the `simplemaze` directory in this repo. ## Visualizations of agent behavior We show the behavior of the agent over two successive episodes, after 0 and 200,000 meta-learning iterations. The reward location is indicated only for visualization purposes: it is invisible to the agent. ### Episode 0 ![Episode 0](anim0_maze.gif) ### Episode 200,000 ![Episode 200,000](anim200K_maze.gif) ## Usage `python3 batch.py --eplen 200 --hs 100 --lr 1e-4 --l2 0 --addpw 3 --pe 1000 --blossv 0.1 --bent 0.03 --rew 10 --save_every 1000 --rsp 1 --type modplast --da tanh --nbiter 200002 --msize 13 --wp 0.0 --bs 30 --gc 4.0 --rngseed 0` `eplen' is the length of an episode, `hs` is the hidden/recurrent layer size, `bs` is batch size and `gc` is gradient clipping. `type` can be "modplast" (simple neuromodulation), "modul" (retroactive modulation), "plastic" (non-modulated plasticity) or "rnn" (no plasticity at all, plain rnn). ================================================ FILE: maze/anim.py ================================================ # python anim.py --nbiter 1000000 --rule oja --squash 0 --hiddensize 200 --lr 1e-4 --eplen 250 --print_every 200 --save_every 1000 --bentropy 0.1 --blossv .03 --randstart 1 --gr .9 --rp 0 --labsize 11 --rngseed 1 --type plastic import argparse import pdb import torch import torch.nn as nn from torch.autograd import Variable import numpy as np from numpy import random import torch.nn.functional as F from torch import optim from torch.optim import lr_scheduler import random import sys import pickle import time import os import OpusHdfsCopy from OpusHdfsCopy import transferFileToHdfsDir, checkHdfs import platform import gridlab from gridlab import Network import numpy as np import matplotlib.pyplot as plt import matplotlib.animation as animation import glob np.set_printoptions(precision=4) ETA = .02 # Not used ADDINPUT = 4 # 1 input for the previous reward, 1 input for numstep, 1 for whether currently on reward square, 1 "Bias" input NBACTIONS = 4 # U, D, L, R RFSIZE = 3 # Receptive Field TOTALNBINPUTS = RFSIZE * RFSIZE + ADDINPUT + NBACTIONS fig = plt.figure() plt.axis('off') def train(paramdict): #params = dict(click.get_current_context().params) print("Starting training...") params = {} #params.update(defaultParams) params.update(paramdict) print("Passed params: ", params) print(platform.uname()) #params['nbsteps'] = params['nbshots'] * ((params['prestime'] + params['interpresdelay']) * params['nbclasses']) + params['prestimetest'] # Total number of steps per episode # This needs to be the same as in the file generated by gridlab, and thus the command line parameters must be identical suffix = "grid_"+"".join([str(x)+"_" if pair[0] is not 'nbsteps' and pair[0] is not 'rngseed' and pair[0] is not 'save_every' and pair[0] is not 'test_every' else '' for pair in sorted(zip(params.keys(), params.values()), key=lambda x:x[0] ) for x in pair])[:-1] + "_rngseed_" + str(params['rngseed']) # Turning the parameters into a nice suffix for filenames params['rngseed'] = 3 # Initialize random seeds (first two redundant?) print("Setting random seeds") np.random.seed(params['rngseed']); random.seed(params['rngseed']); torch.manual_seed(params['rngseed']) #print(click.get_current_context().params) net = Network(params) net.load_state_dict(torch.load('./tmpWorked/torchmodel_'+suffix + '.txt')) print ("Shape of all optimized parameters:", [x.size() for x in net.parameters()]) allsizes = [torch.numel(x.data.cpu()) for x in net.parameters()] print ("Size (numel) of all optimized elements:", allsizes) print ("Total size (numel) of all optimized elements:", sum(allsizes)) #total_loss = 0.0 print("Initializing optimizer") optimizer = torch.optim.Adam(net.parameters(), lr=1.0*params['lr'], eps=1e-4) #optimizer = torch.optim.SGD(net.parameters(), lr=1.0*params['lr']) #scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, params['gamma']) #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, gamma=params['gamma'], step_size=params['steplr']) LABSIZE = params['labsize'] lab = np.ones((LABSIZE, LABSIZE)) CTR = LABSIZE // 2 # Simple cross maze #lab[CTR, 1:LABSIZE-1] = 0 #lab[1:LABSIZE-1, CTR] = 0 # Double-T maze #lab[CTR, 1:LABSIZE-1] = 0 #lab[1:LABSIZE-1, 1] = 0 #lab[1:LABSIZE-1, LABSIZE - 2] = 0 # Grid maze lab[1:LABSIZE-1, 1:LABSIZE-1].fill(0) for row in range(1, LABSIZE - 1): for col in range(1, LABSIZE - 1): if row % 2 == 0 and col % 2 == 0: lab[row, col] = 1 lab[CTR,CTR] = 0 # Not strictly necessary, but perhaps helps loclization by introducing a detectable irregularity in the center all_losses = [] all_losses_objective = [] all_losses_eval = [] all_losses_v = [] lossbetweensaves = 0 nowtime = time.time() print("Starting episodes...") sys.stdout.flush() pos = 0 hidden = net.initialZeroState() hebb = net.initialZeroHebb() # Starting episodes! params['nbiter'] = 1 for numiter in range(params['nbiter']): PRINTTRACE = 0 if (numiter+1) % (1 + params['print_every']) == 0: PRINTTRACE = 1 ## Where is the reward square for this episode? #rnd = np.random.randint(0,4) ##if rnd == 0: ## rposr = 1; rposc = CTR ##elif rnd == 1: ## rposr = CTR; rposc = 1 ##elif rnd == 2: ## rposr = CTR; rposc = LABSIZE - 2 ##elif rnd == 3: ## rposr = LABSIZE - 2; rposc = CTR #if rnd == 0: # rposr = 1; rposc = 1 #elif rnd == 1: # rposr = LABSIZE - 2; rposc = 1 #elif rnd == 2: # rposr = 1; rposc = LABSIZE - 2 #elif rnd == 3: # rposr = LABSIZE - 2; rposc = LABSIZE - 2 # Note: it doesn't matter if the reward is on the center (see below). All we need is not to put it on a wall or pillar (lab=1) rposr = 0; rposc = 0 if params['rp'] == 0: while lab[rposr, rposc] == 1: rposr = np.random.randint(1, LABSIZE - 1) rposc = np.random.randint(1, LABSIZE - 1) elif params['rp'] == 1: while lab[rposr, rposc] == 1 or (rposr != 1 and rposr != LABSIZE -2 and rposc != 1 and rposc != LABSIZE-2): rposr = np.random.randint(1, LABSIZE - 1) rposc = np.random.randint(1, LABSIZE - 1) #print("Reward pos:", rposr, rposc) # Agent always starts an episode from the center posc = CTR posr = CTR optimizer.zero_grad() loss = 0 lossv = 0 hidden = net.initialZeroState() hebb = net.initialZeroHebb() reward = 0.0 rewards = [] vs = [] logprobs = [] sumreward = 0.0 dist = 0 #params['print_every'] = 10 print("==========") print("==========") ax_imgs = [] for numstep in range(params['eplen']): inputsN = np.zeros((1, TOTALNBINPUTS), dtype='float32') inputsN[0, 0:RFSIZE * RFSIZE] = lab[posr - RFSIZE//2:posr + RFSIZE//2 +1, posc - RFSIZE //2:posc + RFSIZE//2 +1].flatten() inputs = torch.from_numpy(inputsN).cuda() # Previous chosen action #inputs[0][numactionchosen] = 1 inputs[0][-1] = 1 # Bias neuron inputs[0][-2] = numstep inputs[0][-3] = reward #if rposr == posr and rposc = posc: # inputs[0][-4] = 1 #else: # inputs[0][-4] = 0 # Running the network y, v, hidden, hebb = net(Variable(inputs, requires_grad=False), hidden, hebb) # y should output probabilities distrib = torch.distributions.Categorical(y) actionchosen = distrib.sample() # sample() returns a Pytorch tensor of size 1; this is needed for the backprop below numactionchosen = actionchosen.data[0] # Turn to scalar tgtposc = posc tgtposr = posr if numactionchosen == 0: # Up tgtposr -= 1 elif numactionchosen == 1: # Down tgtposr += 1 elif numactionchosen == 2: # Left tgtposc -= 1 elif numactionchosen == 3: # Right tgtposc += 1 else: raise ValueError("Wrong Action") reward = 0.0 if lab[tgtposr][tgtposc] == 1: reward = -.1 else: dist += 1 posc = tgtposc posr = tgtposr # Display the labyrinth for numr in range(LABSIZE): s = "" for numc in range(LABSIZE): if posr == numr and posc == numc: s += "o" elif rposr == numr and rposc == numc: s += "X" elif lab[numr, numc] == 1: s += "#" else: s += " " print(s) print("") print("") labg = lab.copy() labg[rposr, rposc] = 2 labg[posr, posc] = 3 fullimg = plt.imshow(labg, animated=True) ax_imgs.append([fullimg]) # Did we hit the reward location ? Increase reward and teleport! # Note that it doesn't matter if we teleport onto the reward, since reward hitting is only evaluated after the (obligatory) move if rposr == posr and rposc == posc: reward += 10 if params['randstart'] == 1: posr = np.random.randint(1, LABSIZE - 1) posc = np.random.randint(1, LABSIZE - 1) while lab[posr, posc] == 1: posr = np.random.randint(1, LABSIZE - 1) posc = np.random.randint(1, LABSIZE - 1) else: posr = CTR posc = CTR rewards.append(reward) vs.append(v) sumreward += reward #loss -= distrib.log_prob(actionchosen) # * reward logprobs.append(distrib.log_prob(actionchosen)) loss += params['bentropy'] * y.pow(2).sum() # We want to penalize concentration, i.e. encourage diversity; our version of PyTorch does not have an entropy() function for Distribution. Note: .2 may be too strong, .04 may be too weak. #if PRINTTRACE: # print("Probabilities:", y.data.cpu().numpy(), "Picked action:", numactionchosen, ", got reward", reward) R = 0 gammaR = params['gr'] for numstepb in reversed(range(params['eplen'])) : R = gammaR * R + rewards[numstepb] lossv += (vs[numstepb][0] - R).pow(2) loss -= logprobs[numstepb] * (R - vs[numstepb].data[0][0]) # Not sure if the "data" is needed... put it b/c of worry about weird gradient flows if True: #PRINTTRACE: print("lossv: ", lossv.data.cpu().numpy()[0]) print ("Total reward for this episode:", sumreward, "Dist:", dist) if params['squash'] == 1: if sumreward < 0: sumreward = -np.sqrt(-sumreward) else: sumreward = np.sqrt(sumreward) elif params['squash'] == 0: pass else: raise ValueError("Incorrect value for squash parameter") #loss *= sumreward loss += params['blossv'] * lossv loss /= params['eplen'] #loss.backward() ##for p in net.parameters(): ## p.grad.data.clamp_(-params['clamp'], params['clamp']) #scheduler.step() #optimizer.step() #torch.cuda.empty_cache() lossnum = loss.data[0] lossbetweensaves += lossnum if (numiter + 1) % 10 == 0: all_losses_objective.append(lossnum) all_losses_eval.append(sumreward) all_losses_v.append(lossv.data[0]) #total_loss += lossnuma anim = animation.ArtistAnimation(fig, ax_imgs, interval=200) anim.save('anim.gif', writer='imagemagick', fps=10) if (numiter+1) % params['print_every'] == 0: print(numiter, "====") print("Mean loss: ", lossbetweensaves / params['print_every']) lossbetweensaves = 0 previoustime = nowtime nowtime = time.time() print("Time spent on last", params['print_every'], "iters: ", nowtime - previoustime) if params['type'] == 'plastic' or params['type'] == 'lstmplastic': print("ETA: ", net.eta.data.cpu().numpy(), "alpha[0,1]: ", net.alpha.data.cpu().numpy()[0,1], "w[0,1]: ", net.w.data.cpu().numpy()[0,1] ) elif params['type'] == 'rnn': print("w[0,1]: ", net.w.data.cpu().numpy()[0,1] ) if (numiter+1) % params['save_every'] == 0: print("Saving files...") # lossbetweensaves /= params['save_every'] # print("Average loss over the last", params['save_every'], "episodes:", lossbetweensaves) # print("Alternative computation (should be equal):", np.mean(all_losses_objective[-params['save_every']:])) losslast100 = np.mean(all_losses_objective[-100:]) print("Average loss over the last 100 episodes:", losslast100) # # Instability detection; necessary for SELUs, which seem to be divergence-prone # # Note that if we are unlucky enough to have diverged within the last 100 timesteps, this may not save us. # if losslast100 > 2 * lossbetweensavesprev: # print("We have diverged ! Restoring last savepoint!") # net.load_state_dict(torch.load('./torchmodel_'+suffix + '.txt')) # else: print("Saving local files...") # with open('results_'+suffix+'.dat', 'wb') as fo: # pickle.dump(net.w.data.cpu().numpy(), fo) # pickle.dump(net.alpha.data.cpu().numpy(), fo) # pickle.dump(net.eta.data.cpu().numpy(), fo) # pickle.dump(all_losses, fo) # pickle.dump(params, fo) #with open('loss_'+suffix+'.txt', 'w') as thefile: # for item in all_losses_objective: # thefile.write("%s\n" % item) #with open('lossv_'+suffix+'.txt', 'w') as thefile: # for item in all_losses_v: # thefile.write("%s\n" % item) #with open('loss_'+suffix+'.txt', 'w') as thefile: # for item in all_losses_eval: # thefile.write("%s\n" % item) #torch.save(net.state_dict(), 'torchmodel_'+suffix+'.txt') #print("Saving HDFS files...") #if checkHdfs(): # print("Transfering to HDFS...") # #transferFileToHdfsDir('results_'+suffix+'.dat', '/ailabs/tmiconi/omniglot/') # transferFileToHdfsDir('loss_'+suffix+'.txt', '/ailabs/tmiconi/gridlab/') # transferFileToHdfsDir('torchmodel_'+suffix+'.txt', '/ailabs/tmiconi/omniglot/') #print("Saved!") # lossbetweensavesprev = lossbetweensaves # lossbetweensaves = 0 # sys.stdout.flush() # sys.stderr.flush() if __name__ == "__main__": #defaultParams = { # 'type' : 'lstm', # 'seqlen' : 200, # 'hiddensize': 500, # 'activ': 'tanh', # 'steplr': 10e9, # By default, no change in the learning rate # 'gamma': .5, # The annealing factor of learning rate decay for Adam # 'imagesize': 31, # 'nbiter': 30000, # 'lr': 1e-4, # 'test_every': 10, # 'save_every': 3000, # 'rngseed':0 #} parser = argparse.ArgumentParser() parser.add_argument("--rngseed", type=int, help="random seed", default=0) #parser.add_argument("--clamp", type=float, help="maximum (absolute value) gradient for clamping", default=1000000.0) parser.add_argument("--bentropy", type=float, help="coefficient for the entropy reward (really Simpson index concentration measure)", default=0.1) parser.add_argument("--blossv", type=float, help="coefficient for value prediction loss", default=.1) parser.add_argument("--labsize", type=int, help="size of the labyrinth; must be odd", default=7) parser.add_argument("--randstart", type=int, help="when hitting reward, should we teleport to random location (1) or center (0)?", default=0) parser.add_argument("--rp", type=int, help="whether the reward should be on the periphery", default=0) parser.add_argument("--squash", type=int, help="squash reward through signed sqrt (1 or 0)", default=0) #parser.add_argument("--nbarms", type=int, help="number of arms", default=2) #parser.add_argument("--nbseq", type=int, help="number of sequences between reinitializations of hidden/Hebbian state and position", default=3) parser.add_argument("--activ", help="activ function ('tanh' or 'selu')", default='tanh') parser.add_argument("--rule", help="learning rule ('hebb' or 'oja')", default='hebb') parser.add_argument("--type", help="network type ('lstm' or 'rnn' or 'plastic')", default='rnn') parser.add_argument("--gr", type=float, help="gammaR: discounting factor for rewards", default=.99) parser.add_argument("--lr", type=float, help="learning rate (Adam optimizer)", default=1e-4) parser.add_argument("--eplen", type=int, help="length of episodes", default=100) parser.add_argument("--hiddensize", type=int, help="size of the recurrent (hidden) layer", default=100) #parser.add_argument("--steplr", type=int, help="duration of each step in the learning rate annealing schedule", default=100000000) parser.add_argument("--steplr", type=int, help="duration of each step in the learning rate annealing schedule", default=0) parser.add_argument("--gamma", type=float, help="learning rate annealing factor", default=0.3) parser.add_argument("--nbiter", type=int, help="number of learning cycles", default=1000000) parser.add_argument("--save_every", type=int, help="number of cycles between successive save points", default=200) parser.add_argument("--print_every", type=int, help="number of cycles between successive printing of information", default=100) #parser.add_argument("--", type=int, help="", default=1e-4) args = parser.parse_args(); argvars = vars(args); argdict = { k : argvars[k] for k in argvars if argvars[k] != None } #train() train(argdict) ================================================ FILE: maze/animbatch.py ================================================ # This code produces animations showing the behavior of an agent for two successive episodes. # Usage: python animbatch.py --file FILENAME [--initialize [0/1]] # FILENAME should be the params_XXX.dat produced by the meta-learning process # (batch.py). Make sure that the torchmodel_xxx.dat file is in the same location. # Optional argument initialize should be set to 1 if you want to ignore the # trained parameters and reinitialize the network, equivalent to obtaining the # "generation-0" network. import argparse import pdb import torch import torch.nn as nn from torch.autograd import Variable import numpy as np from numpy import random import torch.nn.functional as F from torch import optim from torch.optim import lr_scheduler import random import sys import pickle import time import os import OpusHdfsCopy from OpusHdfsCopy import transferFileToHdfsDir, checkHdfs import platform import batch from batch import Network import numpy as np import matplotlib.pyplot as plt import matplotlib.animation as animation import glob np.set_printoptions(precision=4) ETA = .02 # Not used ADDINPUT = 4 # 1 input for the previous reward, 1 input for numstep, 1 for whether currently on reward square, 1 "Bias" input NBACTIONS = 4 # U, D, L, R RFSIZE = 3 # Receptive Field TOTALNBINPUTS = RFSIZE * RFSIZE + ADDINPUT + NBACTIONS fig = plt.figure() plt.axis('off') def train(paramdict): fname = paramdict['file'] with open(fname, 'rb') as f: params = pickle.load(f) #params = dict(click.get_current_context().params) print("Passed params: ", params) print(platform.uname()) #params['nbsteps'] = params['nbshots'] * ((params['prestime'] + params['interpresdelay']) * params['nbclasses']) + params['prestimetest'] # Total number of steps per episode suffix = "btchFixmod_"+"".join([str(x)+"_" if pair[0] != 'nbsteps' and pair[0] != 'rngseed' and pair[0] != 'save_every' and pair[0] != 'test_every' and pair[0] != 'pe' else '' for pair in sorted(zip(params.keys(), params.values()), key=lambda x:x[0] ) for x in pair])[:-1] + "_rngseed_" + str(params['rngseed']) # Turning the parameters into a nice suffix for filenames #suffix = "modRPDT_"+"".join([str(x)+"_" if pair[0] != 'nbsteps' and pair[0] != 'rngseed' and pair[0] != 'save_every' and pair[0] != 'test_every' else '' for pair in sorted(zip(params.keys(), params.values()), key=lambda x:x[0] ) for x in pair])[:-1] + "_rngseed_" + str(params['rngseed']) # Turning the parameters into a nice suffix for filenames print("Reconstructed suffix:", suffix) params['rsp'] = 1 #params['rngseed'] = 3 # Initialize random seeds (first two redundant?) print("Setting random seeds") np.random.seed(params['rngseed']); random.seed(params['rngseed']); torch.manual_seed(params['rngseed']) #print(click.get_current_context().params) net = Network(params) # YOU MAY NEED TO CHANGE THE DIRECTORY HERE: if paramdict['initialize'] == 0: net.load_state_dict(torch.load('./tmp/torchmodel_'+suffix + '.dat')) print ("Shape of all optimized parameters:", [x.size() for x in net.parameters()]) allsizes = [torch.numel(x.data.cpu()) for x in net.parameters()] print ("Size (numel) of all optimized elements:", allsizes) print ("Total size (numel) of all optimized elements:", sum(allsizes)) BATCHSIZE = params['bs'] LABSIZE = params['msize'] lab = np.ones((LABSIZE, LABSIZE)) CTR = LABSIZE // 2 # Simple cross maze #lab[CTR, 1:LABSIZE-1] = 0 #lab[1:LABSIZE-1, CTR] = 0 # Double-T maze #lab[CTR, 1:LABSIZE-1] = 0 #lab[1:LABSIZE-1, 1] = 0 #lab[1:LABSIZE-1, LABSIZE - 2] = 0 # Grid maze lab[1:LABSIZE-1, 1:LABSIZE-1].fill(0) for row in range(1, LABSIZE - 1): for col in range(1, LABSIZE - 1): if row % 2 == 0 and col % 2 == 0: lab[row, col] = 1 # Not strictly necessary, but cleaner since we start the agent at the # center for each episode; may help loclization in some maze sizes # (including 13 and 9, but not 11) by introducing a detectable irregularity # in the center: lab[CTR,CTR] = 0 all_losses = [] all_grad_norms = [] all_losses_objective = [] all_total_rewards = [] all_losses_v = [] lossbetweensaves = 0 nowtime = time.time() meanrewards = np.zeros((LABSIZE, LABSIZE)) meanrewardstmp = np.zeros((LABSIZE, LABSIZE, params['eplen'])) pos = 0 hidden = net.initialZeroState() hebb = net.initialZeroHebb() pw = net.initialZeroPlasticWeights() #celoss = torch.nn.CrossEntropyLoss() # For supervised learning - not used here params['nbiter'] = 3 ax_imgs = [] for numiter in range(params['nbiter']): PRINTTRACE = 0 #if (numiter+1) % (1 + params['pe']) == 0: if (numiter+1) % (params['pe']) == 0: PRINTTRACE = 1 #lab = makemaze.genmaze(size=LABSIZE, nblines=4) #count = np.zeros((LABSIZE, LABSIZE)) # Select the reward location for this episode - not on a wall! # And not on the center either! (though not sure how useful that restriction is...) # We always start the episode from the center (when hitting reward, we may teleport either to center or to a random location depending on params['rsp']) posr = {}; posc = {} rposr = {}; rposc = {} for nb in range(BATCHSIZE): # Note: it doesn't matter if the reward is on the center (see below). All we need is not to put it on a wall or pillar (lab=1) myrposr = 0; myrposc = 0 while lab[myrposr, myrposc] == 1 or (myrposr == CTR and myrposc == CTR): myrposr = np.random.randint(1, LABSIZE - 1) myrposc = np.random.randint(1, LABSIZE - 1) rposr[nb] = myrposr; rposc[nb] = myrposc #print("Reward pos:", rposr, rposc) # Agent always starts an episode from the center posc[nb] = CTR posr[nb] = CTR #optimizer.zero_grad() loss = 0 lossv = 0 hidden = net.initialZeroState() hebb = net.initialZeroHebb() et = net.initialZeroHebb() # Eligibility Trace is identical to Hebbian Trace in shape pw = net.initialZeroPlasticWeights() numactionchosen = 0 reward = np.zeros(BATCHSIZE) sumreward = np.zeros(BATCHSIZE) rewards = [] vs = [] logprobs = [] dist = 0 numactionschosen = np.zeros(BATCHSIZE, dtype='int32') #reloctime = np.random.randint(params['eplen'] // 4, (3 * params['eplen']) // 4) #print("EPISODE ", numiter) for numstep in range(params['eplen']): inputs = np.zeros((BATCHSIZE, TOTALNBINPUTS), dtype='float32') labg = lab.copy() #labg[rposr, rposc] = -1 # The agent can see the reward if it falls within its RF for nb in range(BATCHSIZE): inputs[nb, 0:RFSIZE * RFSIZE] = labg[posr[nb] - RFSIZE//2:posr[nb] + RFSIZE//2 +1, posc[nb] - RFSIZE //2:posc[nb] + RFSIZE//2 +1].flatten() * 1.0 # Previous chosen action inputs[nb, RFSIZE * RFSIZE +1] = 1.0 # Bias neuron inputs[nb, RFSIZE * RFSIZE +2] = numstep / params['eplen'] #inputs[0, RFSIZE * RFSIZE +3] = 1.0 * reward # Reward from previous time step inputs[nb, RFSIZE * RFSIZE +3] = 1.0 * reward[nb] inputs[nb, RFSIZE * RFSIZE + ADDINPUT + numactionschosen[nb]] = 1 #inputs = 100.0 * inputs # input boosting : Very bad with clamp=0 inputsC = torch.from_numpy(inputs).cuda() # Might be better: #if rposr == posr and rposc = posc: # inputs[0][-4] = 100.0 #else: # inputs[0][-4] = 0 # Running the network ## Running the network y, v, hidden, hebb, et, pw = net(Variable(inputsC, requires_grad=False), hidden, hebb, et, pw) # y should output raw scores, not probas # For now: #numactionchosen = np.argmax(y.data[0]) # But wait, this is bad, because the network needs to see the # reward signal to guide its own (within-episode) learning... and # argmax might not provide enough exploration for this! #ee = np.exp(y.data[0].cpu().numpy()) #numactionchosen = np.random.choice(NBNONRESTACTIONS, p = ee / (1e-10 + np.sum(ee))) y = F.softmax(y, dim=1) # Must convert y to probas to use this ! distrib = torch.distributions.Categorical(y) actionschosen = distrib.sample() logprobs.append(distrib.log_prob(actionschosen)) numactionschosen = actionschosen.data.cpu().numpy() # Turn to scalar reward = np.zeros(BATCHSIZE, dtype='float32') #if numiter == 7 and numstep == 1: # pdb.set_trace() for nb in range(BATCHSIZE): myreward = 0 numactionchosen = numactionschosen[nb] tgtposc = posc[nb] tgtposr = posr[nb] if numactionchosen == 0: # Up tgtposr -= 1 elif numactionchosen == 1: # Down tgtposr += 1 elif numactionchosen == 2: # Left tgtposc -= 1 elif numactionchosen == 3: # Right tgtposc += 1 else: raise ValueError("Wrong Action") reward[nb] = 0.0 # The reward for this step if lab[tgtposr][tgtposc] == 1: reward[nb] -= params['wp'] else: #dist += 1 posc[nb] = tgtposc posr[nb] = tgtposr # Did we hit the reward location ? Increase reward and teleport! # Note that it doesn't matter if we teleport onto the reward, since reward hitting is only evaluated after the (obligatory) move if rposr[nb] == posr[nb] and rposc[nb] == posc[nb]: reward[nb] += params['rew'] posr[nb]= np.random.randint(1, LABSIZE - 1) posc[nb] = np.random.randint(1, LABSIZE - 1) while lab[posr[nb], posc[nb]] == 1 or (rposr[nb] == posr[nb] and rposc[nb] == posc[nb]): posr[nb] = np.random.randint(1, LABSIZE - 1) posc[nb] = np.random.randint(1, LABSIZE - 1) rewards.append(reward) vs.append(v) sumreward += reward loss += ( params['bent'] * y.pow(2).sum() / BATCHSIZE ) # We want to penalize concentration, i.e. encourage diversity; our version of PyTorch does not have an entropy() function for Distribution. Note: .2 may be too strong, .04 may be too weak. #lossentmean = .99 * lossentmean + .01 * ( params['bent'] * y.pow(2).sum() / BATCHSIZE ).data[0] # We want to penalize concentration, i.e. encourage diversity; our version of PyTorch does not have an entropy() function for Distribution. Note: .2 may be too strong, .04 may be too weak. if PRINTTRACE: #print("Step ", numstep, "- GI: ", goodinputs, ", GA: ", goodaction, " Inputs: ", inputsN, " - Outputs: ", y.data.cpu().numpy(), " - action chosen: ", numactionchosen, # " - inputsthisstep:", inputsthisstep, " - mean abs pw: ", np.mean(np.abs(pw.data.cpu().numpy())), " -Rew: ", reward) print("Step ", numstep, " Inputs (to 1st in batch): ", inputs[0, :TOTALNBINPUTS], " - Outputs(1st in batch): ", y[0].data.cpu().numpy(), " - action chosen(1st in batch): ", numactionschosen[0], " - mean abs pw: ", np.mean(np.abs(pw.data.cpu().numpy())), " -Reward (this step, 1st in batch): ", reward[0]) # Display the labyrinth #for numr in range(LABSIZE): # s = "" # for numc in range(LABSIZE): # if posr == numr and posc == numc: # s += "o" # elif rposr == numr and rposc == numc: # s += "X" # elif lab[numr, numc] == 1: # s += "#" # else: # s += " " # print(s) #print("") #print("") labg = lab.copy() labg[rposr[0], rposc[0]] = 2 labg[posr[0], posc[0]] = 3 fullimg = plt.imshow(labg, animated=True) ax_imgs.append([fullimg]) # Episode is done, now let's do the actual computations R = Variable(torch.zeros(BATCHSIZE).cuda(), requires_grad=False) gammaR = params['gr'] for numstepb in reversed(range(params['eplen'])) : R = gammaR * R + Variable(torch.from_numpy(rewards[numstepb]).cuda(), requires_grad=False) ctrR = R - vs[numstepb][0] lossv += ctrR.pow(2).sum() / BATCHSIZE loss -= (logprobs[numstepb] * ctrR.detach()).sum() / BATCHSIZE # Need to check if detach() is OK #pdb.set_trace() #elif params['algo'] == 'REI': # R = sumreward # baseline = meanrewards[rposr, rposc] # for numstepb in reversed(range(params['eplen'])) : # loss -= logprobs[numstepb] * (R - baseline) #elif params['algo'] == 'REINOB': # R = sumreward # for numstepb in reversed(range(params['eplen'])) : # loss -= logprobs[numstepb] * R #elif params['algo'] == 'REITMP': # R = 0 # for numstepb in reversed(range(params['eplen'])) : # R = gammaR * R + rewards[numstepb] # loss -= logprobs[numstepb] * R #elif params['algo'] == 'REITMPB': # R = 0 # for numstepb in reversed(range(params['eplen'])) : # R = gammaR * R + rewards[numstepb] # loss -= logprobs[numstepb] * (R - meanrewardstmp[rposr, rposc, numstepb]) #else: # raise ValueError("Which algo?") #meanrewards[rposr, rposc] = (1.0 - params['nu']) * meanrewards[rposr, rposc] + params['nu'] * sumreward #R = 0 #for numstepb in reversed(range(params['eplen'])) : # R = gammaR * R + rewards[numstepb] # meanrewardstmp[rposr, rposc, numstepb] = (1.0 - params['nu']) * meanrewardstmp[rposr, rposc, numstepb] + params['nu'] * R loss += params['blossv'] * lossv loss /= params['eplen'] if True: #PRINTTRACE: if True: #params['algo'] == 'A3C': print("lossv: ", float(lossv)) print ("Total reward for this episode:", sumreward, "Dist:", dist) #if numiter > 100: # Burn-in period for meanrewards # loss.backward() # optimizer.step() #torch.cuda.empty_cache() print("Saving animation....") anim = animation.ArtistAnimation(fig, ax_imgs, interval=200) anim.save('anim.gif', writer='imagemagick', fps=10) if __name__ == "__main__": #defaultParams = { # 'type' : 'lstm', # 'seqlen' : 200, # 'hiddensize': 500, # 'activ': 'tanh', # 'steplr': 10e9, # By default, no change in the learning rate # 'gamma': .5, # The annealing factor of learning rate decay for Adam # 'imagesize': 31, # 'nbiter': 30000, # 'lr': 1e-4, # 'test_every': 10, # 'save_every': 3000, # 'rngseed':0 #} parser = argparse.ArgumentParser() parser.add_argument("--file", help="params file") parser.add_argument("--initialize", help="should we reinitialize the network (1) or keep the trained network (0)?", default=0) args = parser.parse_args(); argvars = vars(args); argdict = { k : argvars[k] for k in argvars if argvars[k] != None } train(argdict) ================================================ FILE: maze/batch.py ================================================ # Backpropamine: differentiable neuromdulated plasticity. # # Copyright (c) 2018-2019 Uber Technologies, Inc. # # Licensed under the Uber Non-Commercial License (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at the root directory of this project. # # See the License file in this repository for the specific language governing # permissions and limitations under the License. #This code implements the "Grid Maze" task. See Section 4.2 in Miconi et al. #ICLR 2019 ( https://openreview.net/pdf?id=r1lrAiA5Ym ), or Section 4.5 in #Miconi et al. ICML 2018 ( https://arxiv.org/abs/1804.02464 ). import argparse import pdb import torch import torch.nn as nn from torch.autograd import Variable import numpy as np from numpy import random import torch.nn.functional as F from torch import optim from torch.optim import lr_scheduler import random import sys import pickle import time import os import platform ##import makemaze import numpy as np #import matplotlib.pyplot as plt import glob np.set_printoptions(precision=4) NBDA = 1 # Number of different DA output neurons. At present, the code assumes NBDA=1 and will NOT WORK if you change this. np.set_printoptions(precision=4) ADDINPUT = 4 # 1 inputs for the previous reward, 1 inputs for numstep, 1 unused, 1 "Bias" inputs NBACTIONS = 4 # Up, Down, Left, Right RFSIZE = 3 # Receptive Field TOTALNBINPUTS = RFSIZE * RFSIZE + ADDINPUT + NBACTIONS ##ttype = torch.FloatTensor; #ttype = torch.cuda.FloatTensor; ##ttype = torch.FloatTensor; #ttype = torch.cuda.FloatTensor; class Network(nn.Module): def __init__(self, params): super(Network, self).__init__() #self.rule = params['rule'] self.type = params['type'] self.softmax= torch.nn.functional.softmax #if params['activ'] == 'tanh': self.activ = F.tanh if params['type'] == 'rnn': self.i2h = torch.nn.Linear(TOTALNBINPUTS, params['hs']).cuda() self.w = torch.nn.Parameter((.01 * torch.rand(params['hs'], params['hs'])).cuda(), requires_grad=True) elif params['type'] == 'modplast': self.i2h = torch.nn.Linear(TOTALNBINPUTS, params['hs']).cuda() self.w = torch.nn.Parameter((.01 * torch.t(torch.rand(params['hs'], params['hs']))).cuda(), requires_grad=True) self.alpha = torch.nn.Parameter((.01 * torch.t(torch.rand(params['hs'], params['hs']))).cuda(), requires_grad=True) self.h2DA = torch.nn.Linear(params['hs'], NBDA).cuda() elif params['type'] == 'plastic' : self.i2h = torch.nn.Linear(TOTALNBINPUTS, params['hs']).cuda() self.w = torch.nn.Parameter((.01 * torch.rand(params['hs'], params['hs'])).cuda(), requires_grad=True) self.alpha = torch.nn.Parameter((.01 * torch.rand(params['hs'], params['hs'])).cuda(), requires_grad=True) self.eta = torch.nn.Parameter((.01 * torch.ones(1)).cuda(), requires_grad=True) # Everyone has the same eta elif params['type'] == 'modul' or params['type'] == 'modul2': self.i2h = torch.nn.Linear(TOTALNBINPUTS, params['hs']).cuda() self.w = torch.nn.Parameter((.01 * torch.rand(params['hs'], params['hs'])).cuda(), requires_grad=True) self.alpha = torch.nn.Parameter((.01 * torch.rand(params['hs'], params['hs'])).cuda(), requires_grad=True) self.etaet = torch.nn.Parameter((.01 * torch.ones(1)).cuda(), requires_grad=True) # Everyone has the same etaet self.h2DA = torch.nn.Linear(params['hs'], NBDA).cuda() else: raise ValueError("Which network type?") self.h2o = torch.nn.Linear(params['hs'], NBACTIONS).cuda() self.h2v = torch.nn.Linear(params['hs'], 1).cuda() self.params = params # Notice that the vectors are row vectors, and the matrices are transposed wrt the usual order, following apparent pytorch conventions # Each *column* of w targets a single output neuron def forward(self, inputs, hidden, hebb, et, pw): BATCHSIZE = self.params['bs'] HS = self.params['hs'] if self.type == 'rnn': hactiv = self.activ(self.i2h(inputs).view(BATCHSIZE, HS, 1) + torch.matmul(self.w.view(1, HS, HS), hidden.view(BATCHSIZE, HS, 1))).view(BATCHSIZE, HS) hidden = hactiv activout = self.h2o(hactiv) # Linear! To be softmax'ed outside the function valueout = self.h2v(hactiv) #valueout = 0 elif self.type == 'plastic': # Each row of w and hebb contains the input weights to a single neuron # hidden = x, hactiv = y hactiv = self.activ(self.i2h(inputs).view(BATCHSIZE, HS, 1) + torch.matmul((self.w + torch.mul(self.alpha, hebb)), hidden.view(BATCHSIZE, HS, 1))).view(BATCHSIZE, HS) activout = self.h2o(hactiv) # Pure linear, raw scores - will be softmaxed later valueout = self.h2v(hactiv) deltahebb = torch.bmm(hactiv.view(BATCHSIZE, HS, 1), hidden.view(BATCHSIZE, 1, HS)) # batched outer product...should it be other way round? if self.params['addpw'] == 3: # Note that there is no decay, even in the Hebb-rule case : additive only! # Hard clamp hebb = torch.clamp( hebb + self.eta * deltahebb, min=-1.0, max=1.0) elif self.params['addpw'] == 2: # Note that there is no decay, even in the Hebb-rule case : additive only! # Soft clamp hebb = torch.clamp( hebb + torch.clamp(self.eta * deltahebb, min=0.0) * (1 - hebb) + torch.clamp(self.eta * deltahebb, max=0.0) * (hebb + 1) , min=-1.0, max=1.0) elif self.params['addpw'] == 1: # Purely additive, tends to make the meta-learning diverge. No decay/clamp. hebb = hebb + self.eta * deltahebb elif self.params['addpw'] == 0: # We do it the normal way. Note that here, Hebb-rule is decaying. # There is probably a way to make it more efficient. hebb = (1 - self.eta) * hebb + self.eta * deltahebb hidden = hactiv elif self.type == 'modplast': #Here we compute the same deltahebb for the whole network, and use #the same addpw for the whole network too. # The rows of w and hebb are the inputs weights to a single neuron # hidden = x, hactiv = y hactiv = self.activ(self.i2h(inputs).view(BATCHSIZE, HS, 1) + torch.matmul((self.w + torch.mul(self.alpha, hebb)), hidden.view(BATCHSIZE, HS, 1))).view(BATCHSIZE, HS) activout = self.h2o(hactiv) # Pure linear, raw scores - will be softmaxed later valueout = self.h2v(hactiv) # Now computing the Hebbian updates... # With batching, DAout is a matrix of size BS x 1 (Really BS x NBDA, but we assume NBDA=1 for now in the deltahebb multiplication below) if self.params['da'] == 'tanh': DAout = F.tanh(self.h2DA(hactiv)) elif self.params['da'] == 'sig': DAout = F.sigmoid(self.h2DA(hactiv)) elif self.params['da'] == 'lin': DAout = self.h2DA(hactiv) else: raise ValueError("Which transformation for DAout ?") # deltahebb has shape BS x HS x HS # Each row of hebb contain the input weights to a neuron deltahebb = torch.bmm(hactiv.view(BATCHSIZE, HS, 1), hidden.view(BATCHSIZE, 1, HS)) # batched outer product...should it be other way round? if self.params['addpw'] == 3: # Hard clamp, purely additive # Note that we do the same for Hebb and Oja's rule hebb1 = torch.clamp(hebb + DAout.view(BATCHSIZE, 1, 1) * deltahebb, min=-1.0, max=1.0) elif self.params['addpw'] == 2: # Note that there is no decay, even in the Hebb-rule case : additive only! hebb1 = torch.clamp( hebb + torch.clamp(DAout.view(BATCHSIZE, 1, 1) * deltahebb, min=0.0) * (1 - hebb) + torch.clamp(DAout.view(BATCHSIZE, 1, 1) * deltahebb, max=0.0) * (hebb + 1) , min=-1.0, max=1.0) elif self.params['addpw'] == 1: # Purely additive. This will almost certainly diverge, don't use it! hebb1 = hebb + DAout.view(BATCHSIZE, 1, 1) * deltahebb elif self.params['addpw'] == 0: # We do it the old way. Note that here, Hebb-rule is decaying. # There is probably a way to make it more efficient # NOTE: THIS WILL GO AWRY if DAout is allowed to go outside [0,1]! # Note 2: For Oja's rule, there is no difference between addpw 0 and addpw1 hebb1 = (1 - DAout.view(BATCHSIZE,1,1)) * hebb + DAout.view(BATCHSIZE, 1, 1) * deltahebb else: raise ValueError("Which additive form for plastic weights?") hebb = hebb1 hidden = hactiv elif self.type == 'modul': # The rows of w and hebb are the inputs weights to a single neuron # hidden = x, hactiv = y hactiv = self.activ(self.i2h(inputs).view(BATCHSIZE, HS, 1) + torch.matmul((self.w + torch.mul(self.alpha, pw)), hidden.view(BATCHSIZE, HS, 1))).view(BATCHSIZE, HS) activout = self.h2o(hactiv) # Pure linear, raw scores - will be softmaxed later valueout = self.h2v(hactiv) # Now computing the Hebbian updates... # With batching, DAout is a matrix of size BS x 1 (Really BS x NBDA, but we assume NBDA=1 for now in the deltahebb multiplication below) if self.params['da'] == 'tanh': DAout = F.tanh(self.h2DA(hactiv)) elif self.params['da'] == 'sig': DAout = F.sigmoid(self.h2DA(hactiv)) elif self.params['da'] == 'lin': DAout = self.h2DA(hactiv) else: raise ValueError("Which transformation for DAout ?") # We need to select the order of operations; network update, e.t. update, neuromodulated incorporation into plastic weights # One possibility (for now go with this one): # - computing all outputs from current inputs, including DA # - incorporating neuromodulated Hebb/eligibility trace into plastic weights # - computing updated hebb/eligibility traces # Another possibility (modul2): # - computing all outputs from current inputs, including DA # - computing updated Hebb/eligibility traces # - incorporating this modified Hebb into plastic weights through neuromodulation # In modul2 we would compute deltaet and update et here too; here we compute them later if self.params['addpw'] == 3: # Hard clamp # From modplast/addpw=3: hebb1 = torch.clamp(hebb + DAout.view(BATCHSIZE, 1, 1) * deltahebb, min=-1.0, max=1.0) deltapw = DAout.view(BATCHSIZE,1,1) * et pw1 = torch.clamp(pw + deltapw, min=-1.0, max=1.0) elif self.params['addpw'] == 2: deltapw = DAout.view(BATCHSIZE,1,1) * et # This constrains the pw to stay within [-1, 1] (we could also do that by putting a tanh on top of it, but instead we want pw itself to remain within that range, to avoid large gradients and facilitate movement back to 0) # The outer clamp is there for safety. In theory the expression within that clamp is "softly" constrained to stay within [-1, 1], but finite-size effects might throw it off. pw1 = torch.clamp( pw + torch.clamp(deltapw, min=0.0) * (1 - pw) + torch.clamp(deltapw, max=0.0) * (pw + 1) , min=-.99999, max=.99999) elif self.params['addpw'] == 1: # Purely additive, tends to make the meta-learning diverge deltapw = DAout.view(BATCHSIZE,1,1) * et pw1 = pw + deltapw elif self.params['addpw'] == 0: # We do it the old way, with a decay term. # This will FAIL if DAout is allowed to go outside [0,1] # Note: this makes the plastic weights decaying! pw1 = (1 - DAout.view(BATCHSIZE,1,1)) * pw1 + DAout.view(BATCHSIZE, 1, 1) * et pw = pw1 # Updating the eligibility trace - always a simple decay term. deltaet = torch.bmm(hactiv.view(BATCHSIZE, HS, 1), hidden.view(BATCHSIZE, 1, HS)) # batched outer product...should it be other way round? et = (1 - self.etaet) * et + self.etaet * deltaet hidden = hactiv else: raise ValueError("Must select network type") return activout, valueout, hidden, hebb, et, pw def initialZeroHebb(self): return Variable(torch.zeros(self.params['bs'], self.params['hs'], self.params['hs']) , requires_grad=False).cuda() def initialZeroPlasticWeights(self): return Variable(torch.zeros(self.params['bs'], self.params['hs'], self.params['hs']) , requires_grad=False).cuda() def initialZeroState(self): BATCHSIZE = self.params['bs'] return Variable(torch.zeros(BATCHSIZE, self.params['hs']), requires_grad=False ).cuda() def train(paramdict): #params = dict(click.get_current_context().params) #TOTALNBINPUTS = RFSIZE * RFSIZE + ADDINPUT + NBNONRESTACTIONS print("Starting training...") params = {} #params.update(defaultParams) params.update(paramdict) print("Passed params: ", params) print(platform.uname()) #params['nbsteps'] = params['nbshots'] * ((params['prestime'] + params['interpresdelay']) * params['nbclasses']) + params['prestimetest'] # Total number of steps per episode suffix = "btchFixmod_"+"".join([str(x)+"_" if pair[0] is not 'nbsteps' and pair[0] is not 'rngseed' and pair[0] is not 'save_every' and pair[0] is not 'test_every' and pair[0] is not 'pe' else '' for pair in sorted(zip(params.keys(), params.values()), key=lambda x:x[0] ) for x in pair])[:-1] + "_rngseed_" + str(params['rngseed']) # Turning the parameters into a nice suffix for filenames # Initialize random seeds (first two redundant?) print("Setting random seeds") np.random.seed(params['rngseed']); random.seed(params['rngseed']); torch.manual_seed(params['rngseed']) #print(click.get_current_context().params) print("Initializing network") net = Network(params) print ("Shape of all optimized parameters:", [x.size() for x in net.parameters()]) allsizes = [torch.numel(x.data.cpu()) for x in net.parameters()] print ("Size (numel) of all optimized elements:", allsizes) print ("Total size (numel) of all optimized elements:", sum(allsizes)) #total_loss = 0.0 print("Initializing optimizer") optimizer = torch.optim.Adam(net.parameters(), lr=1.0*params['lr'], eps=1e-4, weight_decay=params['l2']) BATCHSIZE = params['bs'] LABSIZE = params['msize'] lab = np.ones((LABSIZE, LABSIZE)) CTR = LABSIZE // 2 # Grid maze lab[1:LABSIZE-1, 1:LABSIZE-1].fill(0) for row in range(1, LABSIZE - 1): for col in range(1, LABSIZE - 1): if row % 2 == 0 and col % 2 == 0: lab[row, col] = 1 # Not strictly necessary, but cleaner since we start the agent at the # center for each episode; may help loclization in some maze sizes # (including 13 and 9, but not 11) by introducing a detectable irregularity # in the center: lab[CTR,CTR] = 0 all_losses = [] all_grad_norms = [] all_losses_objective = [] all_total_rewards = [] all_losses_v = [] lossbetweensaves = 0 nowtime = time.time() meanrewards = np.zeros((LABSIZE, LABSIZE)) meanrewardstmp = np.zeros((LABSIZE, LABSIZE, params['eplen'])) pos = 0 hidden = net.initialZeroState() hebb = net.initialZeroHebb() pw = net.initialZeroPlasticWeights() print("Total number of parameters:", sum([x.numel() for x in net.parameters()])) print("Starting episodes!") for numiter in range(params['nbiter']): PRINTTRACE = 0 if (numiter+1) % (params['pe']) == 0: PRINTTRACE = 1 # Select the reward location for this episode - not on a wall! # And not on the center either! (though not sure how useful that restriction is...) # We always start the episode from the center (when hitting reward, we may teleport either to center or to a random location depending on params['rsp']) posr = {}; posc = {} rposr = {}; rposc = {} for nb in range(BATCHSIZE): # Note: it doesn't matter if the reward is on the center (see below). All we need is not to put it on a wall or pillar (lab=1) myrposr = 0; myrposc = 0 while lab[myrposr, myrposc] == 1 or (myrposr == CTR and myrposc == CTR): myrposr = np.random.randint(1, LABSIZE - 1) myrposc = np.random.randint(1, LABSIZE - 1) rposr[nb] = myrposr; rposc[nb] = myrposc #print("Reward pos:", rposr, rposc) # Agent always starts an episode from the center posc[nb] = CTR posr[nb] = CTR optimizer.zero_grad() loss = 0 lossv = 0 hidden = net.initialZeroState() hebb = net.initialZeroHebb() et = net.initialZeroHebb() # Eligibility Trace is identical to Hebbian Trace in shape pw = net.initialZeroPlasticWeights() numactionchosen = 0 reward = np.zeros(BATCHSIZE) sumreward = np.zeros(BATCHSIZE) rewards = [] vs = [] logprobs = [] dist = 0 numactionschosen = np.zeros(BATCHSIZE, dtype='int32') for numstep in range(params['eplen']): inputs = np.zeros((BATCHSIZE, TOTALNBINPUTS), dtype='float32') labg = lab.copy() for nb in range(BATCHSIZE): inputs[nb, 0:RFSIZE * RFSIZE] = labg[posr[nb] - RFSIZE//2:posr[nb] + RFSIZE//2 +1, posc[nb] - RFSIZE //2:posc[nb] + RFSIZE//2 +1].flatten() * 1.0 # Previous chosen action inputs[nb, RFSIZE * RFSIZE +1] = 1.0 # Bias neuron inputs[nb, RFSIZE * RFSIZE +2] = numstep / params['eplen'] inputs[nb, RFSIZE * RFSIZE +3] = 1.0 * reward[nb] inputs[nb, RFSIZE * RFSIZE + ADDINPUT + numactionschosen[nb]] = 1 inputsC = torch.from_numpy(inputs).cuda() ##### Running the network y, v, hidden, hebb, et, pw = net(Variable(inputsC, requires_grad=False), hidden, hebb, et, pw) # y should output raw scores, not probas y = F.softmax(y, dim=1) # Now y is conveted to "proba-like" quantities distrib = torch.distributions.Categorical(y) actionschosen = distrib.sample() logprobs.append(distrib.log_prob(actionschosen)) numactionschosen = actionschosen.data.cpu().numpy() # Turn to scalar reward = np.zeros(BATCHSIZE, dtype='float32') for nb in range(BATCHSIZE): myreward = 0 numactionchosen = numactionschosen[nb] tgtposc = posc[nb] tgtposr = posr[nb] if numactionchosen == 0: # Up tgtposr -= 1 elif numactionchosen == 1: # Down tgtposr += 1 elif numactionchosen == 2: # Left tgtposc -= 1 elif numactionchosen == 3: # Right tgtposc += 1 else: raise ValueError("Wrong Action") reward[nb] = 0.0 # The reward for this step if lab[tgtposr][tgtposc] == 1: reward[nb] -= params['wp'] else: #dist += 1 posc[nb] = tgtposc posr[nb] = tgtposr # Did we hit the reward location ? Increase reward and teleport! # Note that it doesn't matter if we teleport onto the reward, since reward hitting is only evaluated after the (obligatory) move if rposr[nb] == posr[nb] and rposc[nb] == posc[nb]: reward[nb] += params['rew'] posr[nb]= np.random.randint(1, LABSIZE - 1) posc[nb] = np.random.randint(1, LABSIZE - 1) while lab[posr[nb], posc[nb]] == 1 or (rposr[nb] == posr[nb] and rposc[nb] == posc[nb]): posr[nb] = np.random.randint(1, LABSIZE - 1) posc[nb] = np.random.randint(1, LABSIZE - 1) rewards.append(reward) vs.append(v) sumreward += reward # This is the "entropy bonus" of A2C, except that since our version # of PyTorch doesn't have an entropy() function, we implement it as # a penalty on the sum of squares instead. The effect is the same: # we want to penalize concentration of probabilities, i.e. # encourage diversity of actions. loss += ( params['bent'] * y.pow(2).sum() / BATCHSIZE ) if PRINTTRACE: print("Step ", numstep, " Inputs (to 1st in batch): ", inputs[0, :TOTALNBINPUTS], " - Outputs(1st in batch): ", y[0].data.cpu().numpy(), " - action chosen(1st in batch): ", numactionschosen[0], " - mean abs pw: ", np.mean(np.abs(pw.data.cpu().numpy())), " -Reward (this step, 1st in batch): ", reward[0]) # Episode is done, now let's do the actual computations R = Variable(torch.zeros(BATCHSIZE).cuda(), requires_grad=False) gammaR = params['gr'] for numstepb in reversed(range(params['eplen'])) : R = gammaR * R + Variable(torch.from_numpy(rewards[numstepb]).cuda(), requires_grad=False) ctrR = R - vs[numstepb][0] lossv += ctrR.pow(2).sum() / BATCHSIZE loss -= (logprobs[numstepb] * ctrR.detach()).sum() / BATCHSIZE # Need to check if detach() is OK #pdb.set_trace() # These are different algorithms (essentially variants of REINFORCE) that do not train a value predictor inside the network... Might be interesting to see if value prediction emerges even if it's not explicitly demanded by the meta-training algorithm! #elif params['algo'] == 'REI': # R = sumreward # baseline = meanrewards[rposr, rposc] # for numstepb in reversed(range(params['eplen'])) : # loss -= logprobs[numstepb] * (R - baseline) #elif params['algo'] == 'REINOB': # R = sumreward # for numstepb in reversed(range(params['eplen'])) : # loss -= logprobs[numstepb] * R #elif params['algo'] == 'REITMP': # R = 0 # for numstepb in reversed(range(params['eplen'])) : # R = gammaR * R + rewards[numstepb] # loss -= logprobs[numstepb] * R #elif params['algo'] == 'REITMPB': # R = 0 # for numstepb in reversed(range(params['eplen'])) : # R = gammaR * R + rewards[numstepb] # loss -= logprobs[numstepb] * (R - meanrewardstmp[rposr, rposc, numstepb]) #else: # raise ValueError("Which algo?") #meanrewards[rposr, rposc] = (1.0 - params['nu']) * meanrewards[rposr, rposc] + params['nu'] * sumreward #R = 0 #for numstepb in reversed(range(params['eplen'])) : # R = gammaR * R + rewards[numstepb] # meanrewardstmp[rposr, rposc, numstepb] = (1.0 - params['nu']) * meanrewardstmp[rposr, rposc, numstepb] + params['nu'] * R loss += params['blossv'] * lossv loss /= params['eplen'] if PRINTTRACE: if True: #params['algo'] == 'A3C': #print("lossv: ", lossv.data.cpu().numpy()[0]) print("lossv: ", float(lossv)) print ("Total reward for this episode (all in batch):", sumreward, "Dist:", dist) #if params['squash'] == 1: # if sumreward < 0: # sumreward = -np.sqrt(-sumreward) # else: # sumreward = np.sqrt(sumreward) #elif params['squash'] == 0: # pass #else: # raise ValueError("Incorrect value for squash parameter") #loss *= sumreward #for p in net.parameters(): # p.grad.data.clamp_(-params['clp'], params['clp']) loss.backward() all_grad_norms.append(torch.nn.utils.clip_grad_norm(net.parameters(), params['gc'])) if numiter > 100: # Burn-in period for meanrewards optimizer.step() #pdb.set_trace() lossnum = float(loss) lossbetweensaves += lossnum all_losses_objective.append(lossnum) all_total_rewards.append(sumreward.mean()) if (numiter+1) % params['pe'] == 0: print(numiter, "====") print("Mean loss: ", lossbetweensaves / params['pe']) lossbetweensaves = 0 print("Mean reward (across batch and last", params['pe'], "eps.): ", np.sum(all_total_rewards[-params['pe']:])/ params['pe']) previoustime = nowtime nowtime = time.time() print("Time spent on last", params['pe'], "iters: ", nowtime - previoustime) if params['type'] == 'plastic' or params['type'] == 'lstmplastic': print("ETA: ", net.eta.data.cpu().numpy(), "alpha[0,1]: ", net.alpha.data.cpu().numpy()[0,1], "w[0,1]: ", net.w.data.cpu().numpy()[0,1] ) elif params['type'] == 'modul': print("etaet: ", float(net.etaet), " mean-abs pw: ", torch.mean(torch.abs(pw.data))) elif params['type'] == 'rnn': print("w[0,1]: ", net.w.data.cpu().numpy()[0,1] ) if (numiter+1) % params['save_every'] == 0: print("Saving files...") losslast100 = np.mean(all_losses_objective[-100:]) print("Average loss over the last 100 episodes:", losslast100) print("Saving local files...") with open('grad_'+suffix+'.txt', 'w') as thefile: for item in all_grad_norms[::10]: thefile.write("%s\n" % item) with open('loss_'+suffix+'.txt', 'w') as thefile: for item in all_total_rewards[::10]: thefile.write("%s\n" % item) torch.save(net.state_dict(), 'torchmodel_'+suffix+'.dat') with open('params_'+suffix+'.dat', 'wb') as fo: pickle.dump(params, fo) print("Done!") # Uber-only stuff: if os.path.isdir('/mnt/share/tmiconi'): print("Transferring to NFS storage...") for fn in ['params_'+suffix+'.dat', 'loss_'+suffix+'.txt', 'torchmodel_'+suffix+'.dat']: result = os.system( 'cp {} {}'.format(fn, '/mnt/share/tmiconi/modulmaze/'+fn)) print("Done!") if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--rngseed", type=int, help="random seed", default=0) #parser.add_argument("--clamp", type=float, help="maximum (absolute value) gradient for clamping", default=1000000.0) #parser.add_argument("--wp", type=float, help="wall penalty (reward decrement for hitting a wall)", default=0.1) parser.add_argument("--rew", type=float, help="reward value (reward increment for taking correct action after correct stimulus)", default=1.0) parser.add_argument("--wp", type=float, help="penalty for hitting walls", default=.05) #parser.add_argument("--pen", type=float, help="penalty value (reward decrement for taking any non-rest action)", default=.2) #parser.add_argument("--exprew", type=float, help="reward value (reward increment for hitting reward location)", default=.0) parser.add_argument("--bent", type=float, help="coefficient for the entropy reward (really Simpson index concentration measure)", default=0.03) #parser.add_argument("--probarev", type=float, help="probability of reversal (random change) in desired stimulus-response, per time step", default=0.0) parser.add_argument("--blossv", type=float, help="coefficient for value prediction loss", default=.1) #parser.add_argument("--lsize", type=int, help="size of the labyrinth; must be odd", default=7) #parser.add_argument("--rp", type=int, help="whether the reward should be on the periphery", default=0) #parser.add_argument("--squash", type=int, help="squash reward through signed sqrt (1 or 0)", default=0) #parser.add_argument("--nbarms", type=int, help="number of arms", default=2) #parser.add_argument("--nbseq", type=int, help="number of sequences between reinitializations of hidden/Hebbian state and position", default=3) #parser.add_argument("--activ", help="activ function ('tanh' or 'selu')", default='tanh') #parser.add_argument("--algo", help="meta-learning algorithm (A3C or REI)", default='A3C') #parser.add_argument("--rule", help="learning rule ('hebb' or 'oja')", default='hebb') parser.add_argument("--type", help="network type ('lstm' or 'rnn' or 'plastic')", default='modul') parser.add_argument("--msize", type=int, help="size of the maze; must be odd", default=9) parser.add_argument("--da", help="transformation function of DA signal (tanh or sig or lin)", default='tanh') parser.add_argument("--gr", type=float, help="gammaR: discounting factor for rewards", default=.9) parser.add_argument("--gc", type=float, help="gradient norm clipping", default=1000.0) parser.add_argument("--lr", type=float, help="learning rate (Adam optimizer)", default=1e-4) #parser.add_argument("--nu", type=float, help="REINFORCE baseline time constant", default=.1) #parser.add_argument("--samestep", type=int, help="compare stimulus and response in the same step (1) or from successive steps (0) ?", default=0) #parser.add_argument("--nbin", type=int, help="number of possible inputs stimulis", default=4) #parser.add_argument("--modhalf", type=int, help="which half of the recurrent netowkr receives modulation (1 or 2)", default=1) #parser.add_argument("--nbac", type=int, help="number of possible non-rest actions", default=4) parser.add_argument("--rsp", type=int, help="does the agent start each episode from random position (1) or center (0) ?", default=1) parser.add_argument("--addpw", type=int, help="are plastic weights purely additive (1) or forgetting (0) ?", default=1) #parser.add_argument("--clp", type=int, help="inputs clamped (1), fully clamped (2) or through linear layer (0) ?", default=0) #parser.add_argument("--md", type=int, help="maximum delay for reward reception", default=0) parser.add_argument("--eplen", type=int, help="length of episodes", default=100) #parser.add_argument("--exptime", type=int, help="exploration (no reward) time (must be < eplen)", default=0) parser.add_argument("--hs", type=int, help="size of the recurrent (hidden) layer", default=100) parser.add_argument("--bs", type=int, help="batch size", default=1) parser.add_argument("--l2", type=float, help="coefficient of L2 norm (weight decay)", default=3e-6) #parser.add_argument("--steplr", type=int, help="duration of each step in the learning rate annealing schedule", default=100000000) #parser.add_argument("--gamma", type=float, help="learning rate annealing factor", default=0.3) parser.add_argument("--nbiter", type=int, help="number of learning cycles", default=1000000) parser.add_argument("--save_every", type=int, help="number of cycles between successive save points", default=1000) parser.add_argument("--pe", type=int, help="number of cycles between successive printing of information", default=100) #parser.add_argument("--", type=int, help="", default=1e-4) args = parser.parse_args(); argvars = vars(args); argdict = { k : argvars[k] for k in argvars if argvars[k] != None } #train() train(argdict) ================================================ FILE: maze/makefigure.py ================================================ import numpy as np import glob import matplotlib.pyplot as plt import scipy from scipy import stats #colorz = ['r', 'b', 'g', 'c', 'm', 'y', 'orange', 'k'] colorz = ['r', 'm', 'b', 'c', 'y', 'orange'] #colorz = ['g', 'g', 'r', 'r', 'b', 'b'] plt.rc('font', size=14) groupnames = glob.glob('./tmp/loss*tch*gc_4*msize_13*seed_0.txt') #groupnames = glob.glob('./tmp/loss*tch*gc_*msize_17*seed_0.txt') #groupnames = glob.glob('./tmp_prev/loss*addpw_3*md_0*msize_13*rew_1.*seed_0.txt') #groupnames = [x for x in groupnames if not (('pw_0' in x) or ('maz' in x) or # ('modul2' in x) or ('rsp_0' in x))] # pw_0 is bad, rsp_0 is a different setting, modul2 has similar results to to modul, maz is very slightly different #groupnames = glob.glob('./tmp/loss*msize_11*seed_0.txt') #groupnames = glob.glob('./tmp/loss*l2_3e-06*md_4*msize_13*seed_0.txt') # 11 / hs 100, modul vs modplast, with or without delay (and one w/ 4 cpus i.o. 2) #groupnames = glob.glob('./tmp/loss*l2_3e-06*md_0*msize_13*seed_0.txt') # 11 / hs 100, modul vs modplast, with or without delay (and one w/ 4 cpus i.o. 2) #groupnames = glob.glob('./tmp/loss*addpw_2*l2_3e-06*md_0*msize_13*seed_0.txt') # 11 / hs 100, modul vs modplast, with or without delay (and one w/ 4 cpus i.o. 2) #groupnames = glob.glob('./tmp/loss*msize_15*seed_0.txt') # 15, hs 200, modul vs modplast #groupnames = glob.glob('./tmp/loss*hs_100*msize_13*seed_0.txt') # 13, hs 100, modul vs modplast #groupnames = glob.glob('./loss*seed_0.txt') #groupnames = glob.glob('./tmp/loss*msize_13*plastic*seed_0.txt') #groupnames = glob.glob('./tmp/loss*msize_9*seed_0.txt') #groupnames = glob.glob('./tmp/loss*modplast*seed_0.txt') #groupnames = glob.glob('./tmp/loss_*new*eplen_250*rngseed_0.txt') #groupnames = glob.glob('./loss_*rngseed_0.txt') # If you can only use 7 runs, smooth the losses within each run to obtain more reliable estimates of performance! def mavg(x, N): cumsum = np.cumsum(np.insert(x, 0, 0)) return (cumsum[N:] - cumsum[:-N]) / N plt.ion() #plt.figure(figsize=(5,4)) # Smaller figure = relative larger fonts #plt.figure(figsize=(9,7)) # Smaller figure = relative larger fonts plt.figure(figsize=(7,5)) # Smaller figure = relative larger fonts #plt.figure() allmedianls = [] alllosses = [] poscol = 0 maxminlen = 0 minminlen = 999999 # Generate labels, and order of curves namez = [] for numx, x in enumerate(groupnames): if 'rnn' in x: if '139' in x: myname = 'Non-plastic (139 neurons)' else: myname = 'Non-plastic (100 neurons)' elif 'modul' in x: myname = "Retroactive modulation (100 neurons)" elif 'modplast' in x: myname = "Simple modulation (100 neurons)" elif 'plastic' in x: myname = "Non-modulated plasticity (101 neurons)" #if 'pw_3' in x: # myname += " (Hard Clip)" #else: # myname += " (Soft Clip)" namez.append(myname) order = np.argsort(namez)[::-1] namez = [namez[c] for c in order] groupnames = [groupnames[c] for c in order] for numgroup, groupname in enumerate(groupnames): if "batch" in groupname: continue #if "lstm" not in groupname: # continue g = groupname[:-6]+"*" print("====", groupname) fnames = glob.glob(g) fulllosses=[] losses=[] lgts=[] for fn in fnames: if "COPY" in fn: continue if False: #if "seed_3" in fn: # continue #if "seed_7" in fn: # continue if "seed_3" in fn: continue #if "seed_9" in fn: # continue #if "seed_10" in fn: # continue #if "seed_11" in fn: # continue #if "seed_12" in fn: # continue #if "seed_13" in fn: # continue #if "seed_14" in fn: # continue #if "seed_15" in fn: # continue z = np.loadtxt(fn) #z = mavg(z, 10) # For each run, we average the losses over K successive episodes z = z[::10] # Decimation - speed things up! z = z[:1000] # Only plot the first 100K episodes (taking into account decimation above and only every 10th episode is stored in the first place) print(fn, len(z)) if len(z) < 10: print(fn, len(z)) continue #z = z[:90] lgts.append(len(z)) fulllosses.append(z) minlen = min(lgts) if minlen > maxminlen: maxminlen = minlen if minlen < minminlen: minminlen = minlen print("Minlen:", minlen) #if minlen < 1000: # continue for z in fulllosses: losses.append(z[:minlen]) losses = np.array(losses) alllosses.append(losses) meanl = np.mean(losses, axis=0) stdl = np.std(losses, axis=0) #cil = stdl / np.sqrt(losses.shape[0]) * 1.96 # 95% confidence interval - assuming normality #cil = stdl / np.sqrt(losses.shape[0]) * 2.5 # 95% confidence interval - approximated with the t-distribution for 7 d.f. medianl = np.median(losses, axis=0) allmedianls.append(medianl) q1l = np.percentile(losses, 25, axis=0) # 1st quartile q3l = np.percentile(losses, 75, axis=0) # 3rd quartile highl = np.max(losses, axis=0) lowl = np.min(losses, axis=0) #highl = meanl+stdl #lowl = meanl-stdl xx = range(len(meanl)) # xticks and labels #xt = range(0, maxminlen, 1000) xt = range(0, 1001, 200) #xt = range(0, len(meanl), 100) #xt = range(0, len(meanl), 1000) #xt = range(0, 10001, 2000) xtl = [str(10 * 10 * i) for i in xt] # Because of decimation above, and only every 10th loss is recorded in the files #plt.plot(mavg(meanl, 100), label=g) #, color='blue') #plt.fill_between(xx, lowl, highl, alpha=.2) #plt.fill_between(xx, q1l, q3l, alpha=.1) #plt.plot(meanl) #, color='blue') ####plt.plot(mavg(medianl, 100), label=g) #, color='blue') # mavg changes the number of points ! #plt.plot(mavg(q1l, 100), label=g, alpha=.3) #, color='blue') #plt.plot(mavg(q3l, 100), label=g, alpha=.3) #, color='blue') #plt.fill_between(xx, q1l, q3l, alpha=.2) #plt.plot(medianl, label=g) #, color='blue') AVGSIZE = 20 # size of the moving average window xlen = len(mavg(q1l, AVGSIZE)) #mylabel = g[g.find('type'):] mylabel = namez[numgroup]# g print(numgroup, mylabel) #if numgroup // 8 == 0: # zestyle = '-' #elif numgroup // 8 == 1: # zestyle = '--' #elif numgroup // 8 == 2: # zestyle = ':' if numgroup % 2 == 0: zestyle = '-' else: zestyle = '--' plt.plot(mavg(medianl, AVGSIZE), label=mylabel, color=colorz[poscol % len(colorz)], ls=zestyle, lw=2) # mavg changes the number of points ! plt.fill_between( range(xlen), mavg(q1l, AVGSIZE), mavg(q3l, AVGSIZE), alpha=.2, color=colorz[poscol % len(colorz)]) #xlen = len(mavg(meanl, AVGSIZE)) #plt.plot(mavg(meanl, AVGSIZE), label=g, color=colorz[poscol % len(colorz)]) # mavg changes the number of points ! #plt.fill_between( range(xlen), mavg(meanl - cil, AVGSIZE), mavg(meanl + cil, AVGSIZE), alpha=.2, color=colorz[poscol % len(colorz)]) poscol += 1 #plt.fill_between( range(xlen), mavg(lowl, 100), mavg(highl, 100), alpha=.2, color=colorz[numgroup % len(colorz)]) #plt.plot(mavg(losses[0], 1000), label=g, color=colorz[numgroup % len(colorz)]) #for curve in losses[1:]: # plt.plot(mavg(curve, 1000), color=colorz[numgroup % len(colorz)]) ps = [] # Adapt for varying lengths across groups #for n in range(0, alllosses[0].shape[1], 3): #for n in range(0, minminlen): # ps.append(scipy.stats.ranksums(alllosses[0][:,n], alllosses[1][:,n]).pvalue) #ps = np.array(ps) a = alllosses signifs = [] for n in range(minminlen): signifs.append((scipy.stats.ranksums(a[0][:,n], a[4][:,n])).pvalue) signifs = [x[0] for x in zip(range(minlen), signifs) if x[1] < .05] plt.plot( np.array(signifs), [20]*len(signifs), '*') ####plt.legend(loc=(.430,.15), fontsize=13) plt.legend(loc='upper left', fontsize=13) #plt.legend(loc='best', fontsize=13) #plt.xlabel('Loss (sum square diff. b/w final output and target)') plt.xlabel('Number of Episodes') plt.ylabel('Reward') plt.xticks(xt, xtl) #plt.tight_layout() ================================================ FILE: maze/makemaze.py ================================================ # Not used for the current version. import numpy as np def genmaze(size, nblines): nbiter = 0 N = size m = np.zeros((N,N)) m[0,:] = 1 m[-1,:] = 1 m[:,0] = 1 m[:, -1]= 1 MAXLINES = nblines mynblines = 0 while True: nbiter += 1 if nbiter == 10000: #print("Inf. loop in maze gen, resetting map & retrying") # If that happens too often parameters are probably not good #print("IL") # If that happens too often parameters are probably not good m.fill(0) m[0,:] = 1; m[-1,:] = 1; m[:,0] = 1; m[:, -1]= 1; nbiter = 0 mynblines = 0 rcol = 1 + np.random.randint(N-1) rrow = 1 + np.random.randint(N-1) if m[rrow, rcol] == 1: continue ori = np.random.randint(2) if ori == 0: # horizontal start = rcol while m[rrow, start] == 0: start -= 1 end = rcol while m[rrow, end] == 0: end += 1 end -= 1 start += 1 if end-start < 4: continue if np.sum(m[rrow-1, start:end+1]) > 0 or np.sum(m[rrow+1, start:end+1]) > 0: continue if np.sum(m[rrow-2, start:end+1]) > 0 or np.sum(m[rrow+2, start:end+1]) > 0: continue m[rrow, start:end+1] = 1 opening = np.random.randint(start+1, end-1) m[rrow, opening] = 0 m[rrow, opening+1] = 0 mynblines += 1 elif ori == 1: # vertical start = rrow while m[start, rcol] == 0: start -= 1 end = rrow while m[end, rcol] == 0: end += 1 end -= 1 start += 1 if end-start < 5: continue if np.sum(m[start:end+1, rcol-1]) > 0 or np.sum(m[start:end+1, rcol+1]) > 0: continue if np.sum(m[start:end+1, rcol-2]) > 0 or np.sum(m[start:end+1, rcol+2]) > 0: continue m[start:end+1, rcol] = 1 opening = np.random.randint(start+1, end-1) m[opening, rcol] = 0 m[opening+1, rcol] = 0 mynblines += 1 if mynblines >= MAXLINES: break return m if __name__ == '__main__': #M = genmaze(size=50, nblines=8) M = genmaze(size=15, nblines=4) #M = genmaze(size=19, nblines=4) print(M) ================================================ FILE: maze/maze.py ================================================ # Differentiable plasticity: maze exploration task. # # Copyright (c) 2018 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # NOTE: Do NOT use the 'lstmplastic' in this code. Instead, look at the # awd-lstm-lm directory in the Backpropamine repo # (https://github.com/uber-research/backpropamine) for properly implemented # plastic LSTMs. import argparse import pdb import torch import torch.nn as nn from torch.autograd import Variable import numpy as np from numpy import random import torch.nn.functional as F from torch import optim from torch.optim import lr_scheduler import random import sys import pickle import time import os import platform # Uber-only: import OpusHdfsCopy from OpusHdfsCopy import transferFileToHdfsDir, checkHdfs import numpy as np #import matplotlib.pyplot as plt import glob np.set_printoptions(precision=4) ETA = .02 # Not used ADDINPUT = 4 # 1 input for the previous reward, 1 input for numstep, 1 for whether currently on reward square, 1 "Bias" input NBACTIONS = 4 # U, D, L, R RFSIZE = 3 # Receptive field size TOTALNBINPUTS = RFSIZE * RFSIZE + ADDINPUT + NBACTIONS ##ttype = torch.FloatTensor; # For CPU ttype = torch.cuda.FloatTensor; # Gor GPU class Network(nn.Module): def __init__(self, params): super(Network, self).__init__() self.rule = params['rule'] self.type = params['type'] self.softmax= torch.nn.functional.softmax if params['activ'] == 'tanh': self.activ = F.tanh elif params['activ'] == 'selu': self.activ = F.selu else: raise ValueError('Must choose an activ function') if params['type'] == 'lstm': self.lstm = torch.nn.LSTM(TOTALNBINPUTS, params['hiddensize']).cuda() elif params['type'] == 'rnn': self.i2h = torch.nn.Linear(TOTALNBINPUTS, params['hiddensize']).cuda() self.w = torch.nn.Parameter((.01 * torch.rand(params['hiddensize'], params['hiddensize'])).cuda(), requires_grad=True) elif params['type'] == 'homo': self.i2h = torch.nn.Linear(TOTALNBINPUTS, params['hiddensize']).cuda() self.w = torch.nn.Parameter((.01 * torch.rand(params['hiddensize'], params['hiddensize'])).cuda(), requires_grad=True) self.alpha = torch.nn.Parameter((.01 * torch.ones(1)).cuda(), requires_grad=True) # Homogenous plasticity: everyone has the same alpha self.eta = torch.nn.Parameter((.01 * torch.ones(1)).cuda(), requires_grad=True) # Everyone has the same eta elif params['type'] == 'plastic': self.i2h = torch.nn.Linear(TOTALNBINPUTS, params['hiddensize']).cuda() self.w = torch.nn.Parameter((.01 * torch.rand(params['hiddensize'], params['hiddensize'])).cuda(), requires_grad=True) self.alpha = torch.nn.Parameter((.01 * torch.rand(params['hiddensize'], params['hiddensize'])).cuda(), requires_grad=True) self.eta = torch.nn.Parameter((.01 * torch.ones(1)).cuda(), requires_grad=True) # Everyone has the same eta elif params['type'] == 'lstmplastic': # LSTM with plastic connections. HIGHLY EXPERIMENTAL, NOT DEBUGGED - see awd-lstm-lm directory at https://github.com/uber-research/backpropamine instead. self.h2f = torch.nn.Linear(params['hiddensize'], params['hiddensize']).cuda() self.h2i = torch.nn.Linear(params['hiddensize'], params['hiddensize']).cuda() self.h2opt = torch.nn.Linear(params['hiddensize'], params['hiddensize']).cuda() # Plasticity only in the recurrent connections, h to c. #self.h2c = torch.nn.Linear(params['hiddensize'], params['hiddensize']).cuda() # This is replaced by the plastic connection matrices below self.w = torch.nn.Parameter((.01 * torch.rand(params['hiddensize'], params['hiddensize'])).cuda(), requires_grad=True) self.alpha = torch.nn.Parameter((.01 * torch.rand(params['hiddensize'], params['hiddensize'])).cuda(), requires_grad=True) self.eta = torch.nn.Parameter((.01 * torch.ones(1)).cuda(), requires_grad=True) # Everyone has the same eta self.x2f = torch.nn.Linear(TOTALNBINPUTS, params['hiddensize']).cuda() self.x2opt = torch.nn.Linear(TOTALNBINPUTS, params['hiddensize']).cuda() self.x2i = torch.nn.Linear(TOTALNBINPUTS, params['hiddensize']).cuda() self.x2c = torch.nn.Linear(TOTALNBINPUTS, params['hiddensize']).cuda() elif params['type'] == 'lstmmanual': # An LSTM implemented "by hand", to ensure maximum simlarity with the plastic LSTM self.h2f = torch.nn.Linear(params['hiddensize'], params['hiddensize']).cuda() self.h2i = torch.nn.Linear(params['hiddensize'], params['hiddensize']).cuda() self.h2opt = torch.nn.Linear(params['hiddensize'], params['hiddensize']).cuda() self.h2c = torch.nn.Linear(params['hiddensize'], params['hiddensize']).cuda() self.x2f = torch.nn.Linear(TOTALNBINPUTS, params['hiddensize']).cuda() self.x2opt = torch.nn.Linear(TOTALNBINPUTS, params['hiddensize']).cuda() self.x2i = torch.nn.Linear(TOTALNBINPUTS, params['hiddensize']).cuda() self.x2c = torch.nn.Linear(TOTALNBINPUTS, params['hiddensize']).cuda() ##fgt = F.sigmoid(self.x2f(input) + self.h2f(hidden[0])) ##ipt = F.sigmoid(self.x2i(input) + self.h2i(hidden[0])) ##opt = F.sigmoid(self.x2o(input) + self.h2o(hidden[0])) ##cell = torch.mul(fgt, hidden[1]) + torch.mul(ipt, F.tanh(self.x2c(input) + self.h2c(hidden[0]))) ##h = torch.mul(opt, cell) ##hidden = (h, cell) else: raise ValueError("Which network type?") self.h2o = torch.nn.Linear(params['hiddensize'], NBACTIONS).cuda() # From hidden to action output self.h2v = torch.nn.Linear(params['hiddensize'], 1).cuda() # From hidden to value prediction (for A3C) self.params = params # Notice that the vectors are row vectors, and the matrices are transposed wrt the usual order, following apparent pytorch conventions # Each *column* of w targets a single output neuron def forward(self, input, hidden, hebb): if self.type == 'lstm': hactiv, hidden = self.lstm(input.view(1, 1, -1), hidden) # hactiv is just the h. hidden is the h and the cell state, in a tuple hactiv = hactiv.view(1, -1) elif self.type == 'rnn': hactiv = self.activ(self.i2h(input) + hidden.mm(self.w)) hidden = hactiv # Draft for a "manual" lstm: elif self.type== 'lstmmanual': # hidden[0] is the previous h state. hidden[1] is the previous c state fgt = F.sigmoid(self.x2f(input) + self.h2f(hidden[0])) ipt = F.sigmoid(self.x2i(input) + self.h2i(hidden[0])) opt = F.sigmoid(self.x2opt(input) + self.h2opt(hidden[0])) cell = torch.mul(fgt, hidden[1]) + torch.mul(ipt, F.tanh(self.x2c(input) + self.h2c(hidden[0]))) hactiv = torch.mul(opt, F.tanh(cell)) #pdb.set_trace() hidden = (hactiv, cell) if np.isnan(np.sum(hactiv.data.cpu().numpy())) or np.isnan(np.sum(hidden[1].data.cpu().numpy())) : raise ValueError("Nan detected !") elif self.type== 'lstmplastic': fgt = F.sigmoid(self.x2f(input) + self.h2f(hidden[0])) ipt = F.sigmoid(self.x2i(input) + self.h2i(hidden[0])) opt = F.sigmoid(self.x2opt(input) + self.h2opt(hidden[0])) #cell = torch.mul(fgt, hidden[1]) + torch.mul(ipt, F.tanh(self.x2c(input) + self.h2c(hidden[0]))) #Need to think what the inputs and outputs should be for the #plasticity. It might be worth introducing an additional stage #consisting of whatever is multiplied by ift and then added to the #cell state, rather than the full cell state.... But we can #experiment both! #cell = torch.mul(fgt, hidden[1]) + torch.mul(ipt, F.tanh(self.x2c(input) + hidden[0].mm(self.w + torch.mul(self.alpha, hebb)))) # self.h2c(hidden[0]))) inputtocell = F.tanh(self.x2c(input) + hidden[0].mm(self.w + torch.mul(self.alpha, hebb))) cell = torch.mul(fgt, hidden[1]) + torch.mul(ipt, inputtocell) # self.h2c(hidden[0]))) if self.rule == 'hebb': #hebb = (1 - self.eta) * hebb + self.eta * torch.bmm(hidden[0].unsqueeze(2), cell.unsqueeze(1))[0] hebb = (1 - self.eta) * hebb + self.eta * torch.bmm(hidden[0].unsqueeze(2), inputtocell.unsqueeze(1))[0] elif self.rule == 'oja': # NOTE: NOT SURE ABOUT THE OJA VERSION !! hebb = hebb + self.eta * torch.mul((hidden[0][0].unsqueeze(1) - torch.mul(hebb , inputtocell[0].unsqueeze(0))) , inputtocell[0].unsqueeze(0)) # Oja's rule. Remember that yin, yout are row vectors (dim (1,N)). Also, broadcasting! #hebb = hebb + self.eta * torch.mul((hidden[0].unsqueeze(1) - torch.mul(hebb , hactiv[0].unsqueeze(0))) , hactiv[0].unsqueeze(0)) # Oja's rule. Remember that yin, yout are row vectors (dim (1,N)). Also, broadcasting! hactiv = torch.mul(opt, F.tanh(cell)) #pdb.set_trace() hidden = (hactiv, cell) if np.isnan(np.sum(hactiv.data.cpu().numpy())) or np.isnan(np.sum(hidden[1].data.cpu().numpy())) : raise ValueError("Nan detected !") elif self.type == 'plastic': hactiv = self.activ(self.i2h(input) + hidden.mm(self.w + torch.mul(self.alpha, hebb))) if self.rule == 'hebb': hebb = (1 - self.eta) * hebb + self.eta * torch.bmm(hidden.unsqueeze(2), hactiv.unsqueeze(1))[0] elif self.rule == 'oja': hebb = hebb + self.eta * torch.mul((hidden[0].unsqueeze(1) - torch.mul(hebb , hactiv[0].unsqueeze(0))) , hactiv[0].unsqueeze(0)) # Oja's rule. Remember that yin, yout are row vectors (dim (1,N)). Also, broadcasting! else: raise ValueError("Must specify learning rule ('hebb' or 'oja')") hidden = hactiv elif self.type == 'homo': hactiv = self.activ(self.i2h(input) + hidden.mm(self.w + self.alpha * hebb)) if self.rule == 'hebb': hebb = (1 - self.eta) * hebb + self.eta * torch.bmm(hidden.unsqueeze(2), hactiv.unsqueeze(1))[0] elif self.rule == 'oja': hebb = hebb + self.eta * torch.mul((hidden[0].unsqueeze(1) - torch.mul(hebb , hactiv[0].unsqueeze(0))) , hactiv[0].unsqueeze(0)) # Oja's rule. Remember that yin, yout are row vectors (dim (1,N)). Also, broadcasting! else: raise ValueError("Must specify learning rule ('hebb' or 'oja')") hidden = hactiv activout = self.softmax(self.h2o(hactiv)) # Action selection valueout = self.h2v(hactiv) # Value prediction (for A3C) return activout, valueout, hidden, hebb def initialZeroHebb(self): return Variable(torch.zeros(self.params['hiddensize'], self.params['hiddensize']) , requires_grad=False).cuda() def initialZeroState(self): if self.params['type'] == 'lstm': return (Variable(torch.zeros(1, 1, self.params['hiddensize']), requires_grad=False).cuda() , Variable(torch.zeros(1, 1, self.params['hiddensize']), requires_grad=False ).cuda() ) elif self.params['type'] == 'lstmmanual' or self.params['type'] == 'lstmplastic': return (Variable(torch.zeros(1, self.params['hiddensize']), requires_grad=False).cuda() , Variable(torch.zeros(1, self.params['hiddensize']), requires_grad=False ).cuda() ) elif self.params['type'] == 'rnn' or self.params['type'] == 'plastic' or self.params['type'] == 'homo': return Variable(torch.zeros(1, self.params['hiddensize']), requires_grad=False ).cuda() else: raise ValueError("Which type?") def train(paramdict): #params = dict(click.get_current_context().params) print("Starting training...") params = {} #params.update(defaultParams) params.update(paramdict) print("Passed params: ", params) print(platform.uname()) #params['nbsteps'] = params['nbshots'] * ((params['prestime'] + params['interpresdelay']) * params['nbclasses']) + params['prestimetest'] # Total number of steps per episode suffix = "maze_"+"".join([str(x)+"_" if pair[0] is not 'nbsteps' and pair[0] is not 'rngseed' and pair[0] is not 'save_every' and pair[0] is not 'test_every' else '' for pair in sorted(zip(params.keys(), params.values()), key=lambda x:x[0] ) for x in pair])[:-1] + "_rngseed_" + str(params['rngseed']) # Turning the parameters into a nice suffix for filenames # Initialize random seeds (first two redundant?) print("Setting random seeds") np.random.seed(params['rngseed']); random.seed(params['rngseed']); torch.manual_seed(params['rngseed']) print("Initializing network") net = Network(params) print ("Shape of all optimized parameters:", [x.size() for x in net.parameters()]) allsizes = [torch.numel(x.data.cpu()) for x in net.parameters()] print ("Size (numel) of all optimized elements:", allsizes) print ("Total size (numel) of all optimized elements:", sum(allsizes)) print("Initializing optimizer") optimizer = torch.optim.Adam(net.parameters(), lr=1.0*params['lr'], eps=1e-4) #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, gamma=params['gamma'], step_size=params['steplr']) LABSIZE = params['labsize'] lab = np.ones((LABSIZE, LABSIZE)) CTR = LABSIZE // 2 # Simple cross maze #lab[CTR, 1:LABSIZE-1] = 0 #lab[1:LABSIZE-1, CTR] = 0 # Double-T maze #lab[CTR, 1:LABSIZE-1] = 0 #lab[1:LABSIZE-1, 1] = 0 #lab[1:LABSIZE-1, LABSIZE - 2] = 0 # Grid maze lab[1:LABSIZE-1, 1:LABSIZE-1].fill(0) for row in range(1, LABSIZE - 1): for col in range(1, LABSIZE - 1): if row % 2 == 0 and col % 2 == 0: lab[row, col] = 1 lab[CTR,CTR] = 0 # Not really necessary, but nicer to not start on a wall, and perhaps helps localization by introducing a detectable irregularity in the center? all_losses = [] all_losses_objective = [] all_losses_eval = [] all_losses_v = [] lossbetweensaves = 0 nowtime = time.time() print("Starting episodes...") sys.stdout.flush() pos = 0 hidden = net.initialZeroState() hebb = net.initialZeroHebb() # Starting episodes! for numiter in range(params['nbiter']): PRINTTRACE = 0 if (numiter+1) % (1 + params['print_every']) == 0: PRINTTRACE = 1 # Note: it doesn't matter if the reward is on the center (reward is only computed after an action is taken). All we need is not to put it on a wall or pillar (lab=1) rposr = 0; rposc = 0 if params['rp'] == 0: # If we want to constrain the reward to fall on the periphery of the maze while lab[rposr, rposc] == 1: rposr = np.random.randint(1, LABSIZE - 1) rposc = np.random.randint(1, LABSIZE - 1) elif params['rp'] == 1: while lab[rposr, rposc] == 1 or (rposr != 1 and rposr != LABSIZE -2 and rposc != 1 and rposc != LABSIZE-2): rposr = np.random.randint(1, LABSIZE - 1) rposc = np.random.randint(1, LABSIZE - 1) #print("Reward pos:", rposr, rposc) # Agent always starts an episode from the center posc = CTR posr = CTR optimizer.zero_grad() loss = 0 lossv = 0 hidden = net.initialZeroState() hebb = net.initialZeroHebb() reward = 0.0 rewards = [] vs = [] logprobs = [] sumreward = 0.0 dist = 0 for numstep in range(params['eplen']): inputsN = np.zeros((1, TOTALNBINPUTS), dtype='float32') inputsN[0, 0:RFSIZE * RFSIZE] = lab[posr - RFSIZE//2:posr + RFSIZE//2 +1, posc - RFSIZE //2:posc + RFSIZE//2 +1].flatten() inputs = torch.from_numpy(inputsN).cuda() # Previous chosen action #inputs[0][numactionchosen] = 1 inputs[0][-1] = 1 # Bias neuron inputs[0][-2] = numstep inputs[0][-3] = reward # Running the network y, v, hidden, hebb = net(Variable(inputs, requires_grad=False), hidden, hebb) # y should output probabilities; v is the value prediction distrib = torch.distributions.Categorical(y) actionchosen = distrib.sample() # sample() returns a Pytorch tensor of size 1; this is needed for the backprop below numactionchosen = actionchosen.data[0] # Turn to scalar # Target position, based on the selected action tgtposc = posc tgtposr = posr if numactionchosen == 0: # Up tgtposr -= 1 elif numactionchosen == 1: # Down tgtposr += 1 elif numactionchosen == 2: # Left tgtposc -= 1 elif numactionchosen == 3: # Right tgtposc += 1 else: raise ValueError("Wrong Action") reward = 0.0 if lab[tgtposr][tgtposc] == 1: reward = -.1 else: dist += 1 posc = tgtposc posr = tgtposr # Did we hit the reward location ? Increase reward and teleport! # Note that it doesn't matter if we teleport onto the reward, since reward hitting is only evaluated after the (obligatory) move if rposr == posr and rposc == posc: reward += 10 if params['randstart'] == 1: posr = np.random.randint(1, LABSIZE - 1) posc = np.random.randint(1, LABSIZE - 1) while lab[posr, posc] == 1: posr = np.random.randint(1, LABSIZE - 1) posc = np.random.randint(1, LABSIZE - 1) else: posr = CTR posc = CTR # Store the obtained reward, value prediction, and log-probabilities, for this time step rewards.append(reward) sumreward += reward vs.append(v) logprobs.append(distrib.log_prob(actionchosen)) # A3C/A2C has an entropy reward on the output probabilities, to # encourage exploration. Our version of PyTorch does not have an # entropy() function for Distribution, so we use a penalty on the # sum of squares instead, which has the same basic property # (discourages concentration). It really does help! loss += params['bentropy'] * y.pow(2).sum() #if PRINTTRACE: # print("Probabilities:", y.data.cpu().numpy(), "Picked action:", numactionchosen, ", got reward", reward) # Do the A2C ! (essentially copied from V. Mnih, https://arxiv.org/abs/1602.01783, Algorithm S3) R = 0 gammaR = params['gr'] for numstepb in reversed(range(params['eplen'])) : R = gammaR * R + rewards[numstepb] lossv += (vs[numstepb][0] - R).pow(2) loss -= logprobs[numstepb] * (R - vs[numstepb].data[0][0]) if PRINTTRACE: print("lossv: ", lossv.data.cpu().numpy()[0]) print ("Total reward for this episode:", sumreward, "Dist:", dist) # Do we want to squash rewards for stabilization? if params['squash'] == 1: if sumreward < 0: sumreward = -np.sqrt(-sumreward) else: sumreward = np.sqrt(sumreward) elif params['squash'] == 0: pass else: raise ValueError("Incorrect value for squash parameter") # Mixing the reward loss and the value-prediction loss loss += params['blossv'] * lossv loss /= params['eplen'] loss.backward() #scheduler.step() optimizer.step() #torch.cuda.empty_cache() lossnum = loss.data[0] lossbetweensaves += lossnum if (numiter + 1) % 10 == 0: all_losses_objective.append(lossnum) all_losses_eval.append(sumreward) all_losses_v.append(lossv.data[0]) # Algorithm done. Now print statistics and save files. if (numiter+1) % params['print_every'] == 0: print(numiter, "====") print("Mean loss: ", lossbetweensaves / params['print_every']) lossbetweensaves = 0 previoustime = nowtime nowtime = time.time() print("Time spent on last", params['print_every'], "iters: ", nowtime - previoustime) if params['type'] == 'plastic' or params['type'] == 'lstmplastic': print("ETA: ", net.eta.data.cpu().numpy(), "alpha[0,1]: ", net.alpha.data.cpu().numpy()[0,1], "w[0,1]: ", net.w.data.cpu().numpy()[0,1] ) elif params['type'] == 'rnn': print("w[0,1]: ", net.w.data.cpu().numpy()[0,1] ) if (numiter+1) % params['save_every'] == 0: print("Saving files...") losslast100 = np.mean(all_losses_objective[-100:]) print("Average loss over the last 100 episodes:", losslast100) print("Saving local files...") with open('params_'+suffix+'.dat', 'wb') as fo: pickle.dump(params, fo) with open('lossv_'+suffix+'.txt', 'w') as thefile: for item in all_losses_v: thefile.write("%s\n" % item) with open('loss_'+suffix+'.txt', 'w') as thefile: for item in all_losses_eval: thefile.write("%s\n" % item) torch.save(net.state_dict(), 'torchmodel_'+suffix+'.dat') # Uber-only print("Saving HDFS files...") if checkHdfs(): print("Transfering to HDFS...") transferFileToHdfsDir('loss_'+suffix+'.txt', '/ailabs/tmiconi/gridlab/') transferFileToHdfsDir('torchmodel_'+suffix+'.dat', '/ailabs/tmiconi/gridlab/') transferFileToHdfsDir('params_'+suffix+'.dat', '/ailabs/tmiconi/gridlab/') #print("Saved!") if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--rngseed", type=int, help="random seed", default=0) #parser.add_argument("--clamp", type=float, help="maximum (absolute value) gradient for clamping", default=1000000.0) parser.add_argument("--bentropy", type=float, help="coefficient for the A2C 'entropy' reward (really Simpson index concentration measure)", default=0.1) parser.add_argument("--blossv", type=float, help="coefficient for the A2C value prediction loss", default=.03) parser.add_argument("--labsize", type=int, help="size of the labyrinth; must be odd", default=9) parser.add_argument("--randstart", type=int, help="when hitting reward, should we teleport to random location (1) or center (0)?", default=1) parser.add_argument("--rp", type=int, help="whether the reward should be on the periphery", default=0) parser.add_argument("--squash", type=int, help="squash reward through signed sqrt (1 or 0)", default=0) #parser.add_argument("--nbarms", type=int, help="number of arms", default=2) #parser.add_argument("--nbseq", type=int, help="number of sequences between reinitializations of hidden/Hebbian state and position", default=3) parser.add_argument("--activ", help="activ function ('tanh' or 'selu')", default='tanh') parser.add_argument("--rule", help="learning rule ('hebb' or 'oja')", default='oja') parser.add_argument("--type", help="network type ('rnn' or 'plastic')", default='rnn') parser.add_argument("--gr", type=float, help="gammaR: discounting factor for rewards", default=.9) parser.add_argument("--lr", type=float, help="learning rate (Adam optimizer)", default=1e-4) parser.add_argument("--eplen", type=int, help="length of episodes", default=250) parser.add_argument("--hiddensize", type=int, help="size of the recurrent (hidden) layer", default=200) #parser.add_argument("--steplr", type=int, help="duration of each step in the learning rate annealing schedule", default=100000000) #parser.add_argument("--gamma", type=float, help="learning rate annealing factor", default=0.3) parser.add_argument("--nbiter", type=int, help="number of learning cycles", default=1000000) parser.add_argument("--save_every", type=int, help="number of cycles between successive save points", default=200) parser.add_argument("--print_every", type=int, help="number of cycles between successive printing of information", default=100) #parser.add_argument("--", type=int, help="", default=1e-4) args = parser.parse_args(); argvars = vars(args); argdict = { k : argvars[k] for k in argvars if argvars[k] != None } #train() train(argdict) ================================================ FILE: maze/opus.docker ================================================ #tmiconi_rl #latest #. #FROM localhost:5000/opus-deep-learning:master-test-2017_9_7_20_56_10 FROM localhost:5000/opus-deep-learning:master-test-2018_1_3_0_38_14 RUN mkdir /home/work COPY ./*.py /home/work/ ENV LC_ALL C.UTF-8 ENV LANG C.UTF-8 ================================================ FILE: maze/opus.docker.old ================================================ #tmiconi_rl #latest #. #FROM localhost:5000/opus-deep-learning:master-test-2017_9_7_20_56_10 FROM opus-deep-learning-py3:master-prod-2019_2_5_4_54_39 #FROM opus-deep-learning:master--2018_9_20_18_2_31 RUN mkdir /home/work COPY ./*.py /home/work/ ENV LC_ALL C.UTF-8 ENV LANG C.UTF-8 ================================================ FILE: maze/plotfigure.py ================================================ import numpy as np import glob import matplotlib.pyplot as plt import scipy from scipy import stats colorz = ['r', 'b', 'g', 'c', 'm', 'y', 'orange', 'k'] groupnames = glob.glob('./tmpWorked/loss_*absize_11_*rngseed_1.txt') #groupnames = glob.glob('./tmp8/loss_*eplen_251*densize_200*absize_11_*ndstart_1*rngseed_1.txt') #groupnames = glob.glob('./tmp8/loss_*eplen_251*densize_200*absize_11_*ndstart_1*rngseed_1.txt') #groupnames = glob.glob('./tmp/loss_*new*eplen_251*rngseed_0.txt') #groupnames = glob.glob('./tmp/loss_*new*eplen_250*rngseed_0.txt') plt.rc('font', size=14) # If you can only use 7 runs, smooth the losses within each run to obtain more reliable estimates of performance! def mavg(x, N): cumsum = np.cumsum(np.insert(x, 0, 0)) return (cumsum[N:] - cumsum[:-N]) / N plt.ion() #plt.figure(figsize=(5,4)) # Smaller figure = relative larger fonts plt.figure() allmedianls = [] alllosses = [] poscol = 0 minminlen = 999999 for numgroup, groupname in enumerate(groupnames): if "lstm" in groupname: continue g = groupname[:-6]+"*" print("====", groupname) fnames = glob.glob(g) fulllosses=[] losses=[] lgts=[] for fn in fnames: if False: if "seed_7" in fn: continue if "seed_8" in fn: continue z = np.loadtxt(fn) z = mavg(z, 20) # For each run, we average the losses over K successive episodes - otherwise figure is unreadable due to noise! z = z[::10] # Decimation - speed things up! z = mavg(z, 10) #z = z[:5001] if len(z) < 9000: print(fn) continue #z = z[:90] lgts.append(len(z)) fulllosses.append(z) minlen = min(lgts) if minlen < minminlen: minminlen = minlen print(minlen) #if minlen < 1000: # continue for z in fulllosses: losses.append(z[:minlen]) losses = np.array(losses) alllosses.append(losses) meanl = np.mean(losses, axis=0) stdl = np.std(losses, axis=0) #cil = stdl / np.sqrt(losses.shape[0]) * 1.96 # 95% confidence interval - assuming normality cil = stdl / np.sqrt(losses.shape[0]) * 2.5 # 95% confidence interval - approximated with the t-distribution for 7 d.f. (?) medianl = np.median(losses, axis=0) allmedianls.append(medianl) q1l = np.percentile(losses, 25, axis=0) q3l = np.percentile(losses, 75, axis=0) highl = np.max(losses, axis=0) lowl = np.min(losses, axis=0) #highl = meanl+stdl #lowl = meanl-stdl xx = range(len(meanl)) # xticks and labels #xt = range(0, len(meanl), 2000) xt = range(0, 10001, 2000) xtl = [str(10 * 10 * i) for i in xt] # Because of decimation above, and only every 10th loss is recorded in the files if "plastic" in groupname: lbl = "Plastic" if "homo" in groupname: lbl = "Homogenous Plastic" elif "rnn" in groupname: lbl = "Non-plastic" #plt.plot(mavg(meanl, 100), label=g) #, color='blue') #plt.fill_between(xx, lowl, highl, alpha=.2) #plt.fill_between(xx, q1l, q3l, alpha=.1) #plt.plot(meanl) #, color='blue') ####plt.plot(mavg(medianl, 100), label=g) #, color='blue') # mavg changes the number of points ! #plt.plot(mavg(q1l, 100), label=g, alpha=.3) #, color='blue') #plt.plot(mavg(q3l, 100), label=g, alpha=.3) #, color='blue') #plt.fill_between(xx, q1l, q3l, alpha=.2) #plt.plot(medianl, label=g) #, color='blue') AVGSIZE = 1 xlen = len(mavg(q1l, AVGSIZE)) plt.plot(mavg(medianl, AVGSIZE), color=colorz[poscol % len(colorz)], label=lbl) # mavg changes the number of points ! plt.fill_between( range(xlen), mavg(q1l, AVGSIZE), mavg(q3l, AVGSIZE), alpha=.2, color=colorz[poscol % len(colorz)]) #xlen = len(mavg(meanl, AVGSIZE)) #plt.plot(mavg(meanl, AVGSIZE), label=g, color=colorz[poscol % len(colorz)]) # mavg changes the number of points ! #plt.fill_between( range(xlen), mavg(meanl - cil, AVGSIZE), mavg(meanl + cil, AVGSIZE), alpha=.2, color=colorz[poscol % len(colorz)]) poscol += 1 #plt.fill_between( range(xlen), mavg(lowl, 100), mavg(highl, 100), alpha=.2, color=colorz[numgroup % len(colorz)]) #plt.plot(mavg(losses[0], 1000), label=g, color=colorz[numgroup % len(colorz)]) #for curve in losses[1:]: # plt.plot(mavg(curve, 1000), color=colorz[numgroup % len(colorz)]) ps = [] # Adapt for varying lengths across groups #for n in range(0, alllosses[0].shape[1], 3): for n in range(0, minminlen): ps.append(scipy.stats.ranksums(alllosses[0][:,n], alllosses[1][:,n]).pvalue) ps = np.array(ps) print(np.mean(ps[-500:] < .05), np.mean(ps[-500:] < .01)) plt.legend(loc='best', fontsize=14) #plt.xlabel('Loss (sum square diff. b/w final output and target)') plt.xlabel('Number of Episodes') plt.ylabel('Reward') plt.xticks(xt, xtl) #plt.tight_layout() ================================================ FILE: maze/plotresults.py ================================================ # Code for plotting results # # Copyright (c) 2018 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import glob import matplotlib.pyplot as plt import scipy from scipy import stats colorz = ['r', 'b', 'g', 'c', 'm', 'y', 'orange', 'k'] groupnames = glob.glob('./tmp/loss_*new*eplen_251*rngseed_0.txt') #groupnames = glob.glob('./tmp/loss_*new*eplen_250*rngseed_0.txt') #groupnames = glob.glob('./tmp/loss_*new*.9_*rngseed_0.txt') # If you can only use 7 runs, smooth the losses within each run to obtain more reliable estimates of performance! def mavg(x, N): cumsum = np.cumsum(np.insert(x, 0, 0)) return (cumsum[N:] - cumsum[:-N]) / N plt.ion() #plt.figure(figsize=(5,4)) # Smaller figure = relative larger fonts plt.figure() allmedianls = [] alllosses = [] poscol = 0 minminlen = 999999 for numgroup, groupname in enumerate(groupnames): #if "lstm" not in groupname: # continue g = groupname[:-6]+"*" print("====", groupname) fnames = glob.glob(g) fulllosses=[] losses=[] lgts=[] for fn in fnames: if "COPY" in fn: continue if False: #if "seed_3" in fn: # continue #if "seed_7" in fn: # continue if "seed_8" in fn: continue if "seed_9" in fn: continue if "seed_10" in fn: continue if "seed_11" in fn: continue if "seed_12" in fn: continue if "seed_13" in fn: continue if "seed_14" in fn: continue if "seed_15" in fn: continue z = np.loadtxt(fn) z = mavg(z, 10) # For each run, we average the losses over K successive episodes z = z[::10] # Decimation - speed things up! if len(z) < 500: print(fn) continue #z = z[:90] lgts.append(len(z)) fulllosses.append(z) minlen = min(lgts) if minlen < minminlen: minminlen = minlen print(minlen) #if minlen < 1000: # continue for z in fulllosses: losses.append(z[:minlen]) losses = np.array(losses) alllosses.append(losses) meanl = np.mean(losses, axis=0) stdl = np.std(losses, axis=0) cil = stdl / np.sqrt(losses.shape[0]) * 1.96 # 95% confidence interval - assuming normality #cil = stdl / np.sqrt(losses.shape[0]) * 2.5 # 95% confidence interval - approximated with the t-distribution for 7 d.f. medianl = np.median(losses, axis=0) allmedianls.append(medianl) q1l = np.percentile(losses, 25, axis=0) q3l = np.percentile(losses, 75, axis=0) highl = np.max(losses, axis=0) lowl = np.min(losses, axis=0) #highl = meanl+stdl #lowl = meanl-stdl xx = range(len(meanl)) # xticks and labels xt = range(0, len(meanl), 500) xtl = [str(10 * 10 * i) for i in xt] # Because of decimation above, and only every 10th loss is recorded in the files #plt.plot(mavg(meanl, 100), label=g) #, color='blue') #plt.fill_between(xx, lowl, highl, alpha=.2) #plt.fill_between(xx, q1l, q3l, alpha=.1) #plt.plot(meanl) #, color='blue') ####plt.plot(mavg(medianl, 100), label=g) #, color='blue') # mavg changes the number of points ! #plt.plot(mavg(q1l, 100), label=g, alpha=.3) #, color='blue') #plt.plot(mavg(q3l, 100), label=g, alpha=.3) #, color='blue') #plt.fill_between(xx, q1l, q3l, alpha=.2) #plt.plot(medianl, label=g) #, color='blue') AVGSIZE = 20 xlen = len(mavg(q1l, AVGSIZE)) plt.plot(mavg(medianl, AVGSIZE), label=g, color=colorz[poscol % len(colorz)]) # mavg changes the number of points ! plt.fill_between( range(xlen), mavg(q1l, AVGSIZE), mavg(q3l, AVGSIZE), alpha=.2, color=colorz[poscol % len(colorz)]) #xlen = len(mavg(meanl, AVGSIZE)) #plt.plot(mavg(meanl, AVGSIZE), label=g, color=colorz[poscol % len(colorz)]) # mavg changes the number of points ! #plt.fill_between( range(xlen), mavg(meanl - cil, AVGSIZE), mavg(meanl + cil, AVGSIZE), alpha=.2, color=colorz[poscol % len(colorz)]) poscol += 1 #plt.fill_between( range(xlen), mavg(lowl, 100), mavg(highl, 100), alpha=.2, color=colorz[numgroup % len(colorz)]) #plt.plot(mavg(losses[0], 1000), label=g, color=colorz[numgroup % len(colorz)]) #for curve in losses[1:]: # plt.plot(mavg(curve, 1000), color=colorz[numgroup % len(colorz)]) ps = [] # Adapt for varying lengths across groups #for n in range(0, alllosses[0].shape[1], 3): #for n in range(0, minminlen): # ps.append(scipy.stats.ranksums(alllosses[0][:,n], alllosses[1][:,n]).pvalue) #ps = np.array(ps) plt.legend(loc='best', fontsize=6) #plt.xlabel('Loss (sum square diff. b/w final output and target)') plt.xlabel('Number of Episodes') plt.ylabel('Loss') plt.xticks(xt, xtl) #plt.tight_layout() ================================================ FILE: maze/request.json ================================================ { "dockerImage":"tmiconi_rl", "tag":"master-test-2018_2_9_17_17_4", "name":"Exp10_new_B_gr9_hs_100_labsize_11_eplen251_lstmplastic", "cpus":2.0, "cmdLine":"export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \u0026\u0026 PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/nvidia/bin:/opt/hadoop/latest/bin \u0026\u0026 export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64/ \u0026\u0026 export LC_ALL=C.UTF-8 \u0026\u0026 export LANG=C.UTF-8 \u0026\u0026 cd /home/work/ \u0026\u0026 python3 gridlab.py --nbiter 1000000 --rule oja --squash 0 --hiddensize 100 --type lstmplastic --lr 1e-4 --eplen 251 --print_every 100 --save_every 1000 --bentropy 0.1 --blossv .03 --randstart 1 --gr .9 --rp 0 --labsize 11 --rngseed {{mesos.instance}}", "ramMB":8000, "gpus":1, "diskMB":8000, "cluster":"opusprodda1", "environment":"devel", "user":"tmiconi_peloton", "instances":15, "isService":false, "cronSchedule":"", "custom":{}, "application":"testversion", "maxRetries":1, "constraints":{"sku":"1080ti"}, "accessTypes":[], "dependencies":[], "cronCollisionPolicy":"CANCEL_NEW", "emailOnFail":[], "emailOnSucceed":[] } ================================================ FILE: maze/request_devbox.json ================================================ { "dockerImage":"tmiconi_rl", "tag":"master-test-2019_5_3_11_4_2", "cpus":2.0, "ramMB":6000, "gpus":1, "diskMB":8000, "cluster":"opusprodda1", "environment":"devel", "user":"tmiconi", "resourcePool": "/ailabs/p1/tmiconi", "instances":1, "isService":false, "cronSchedule":"", "custom":{}, "application":"testversion", "maxRetries":1, "constraints":{"sku":"p6000"}, "accessTypes":[], "dependencies":[], "cronCollisionPolicy":"CANCEL_NEW", "emailOnFail":[], "emailOnSucceed":[] } ================================================ FILE: maze/request_modplast.json ================================================ { "dockerImage":"tmiconi_rl", "tag":"master-test-2019_5_3_11_4_2", "name":"Maze_Modplast_hs100_eplen200_addpw3_bv0.1_bent0.03_rew10_bs30_gc4_lr1e-4_l20", "cpus":2.0, "cmdLine":"export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \u0026\u0026 PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/nvidia/bin:/opt/hadoop/latest/bin \u0026\u0026 export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64/ \u0026\u0026 export LC_ALL=C.UTF-8 \u0026\u0026 export LANG=C.UTF-8 \u0026\u0026 cd /home/work/ \u0026\u0026 python3 batch.py --eplen 200 --hs 100 --lr 1e-4 --l2 0 --addpw 3 --pe 1000 --blossv 0.1 --bent 0.03 --rew 10 --save_every 1000 --rsp 1 --type modplast --da tanh --nbiter 200002 --msize 13 --wp 0.0 --bs 30 --gc 4.0 --rngseed {{mesos.instance}}", "ramMB":8000, "gpus":1, "diskMB":8000, "cluster":"opusprodda1", "environment":"devel", "user":"tmiconi", "resourcePool": "/ailabs/p1/tmiconi", "instances":10, "isService":false, "cronSchedule":"", "custom":{}, "application":"testversion", "maxRetries":1, "constraints":{"sku":"p6000"}, "accessTypes":[], "dependencies":[], "cronCollisionPolicy":"CANCEL_NEW", "emailOnFail":[], "emailOnSucceed":[] } ================================================ FILE: maze/request_modul.json ================================================ { "dockerImage":"tmiconi_rl", "tag":"master-test-2019_5_3_11_4_2", "name":"Maze_Modul_hs100_eplen200_addpw3_bv0.1_bent0.03_rew10_bs30_gc4_lr1e-4_l20", "cpus":2.0, "cmdLine":"export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \u0026\u0026 PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/nvidia/bin:/opt/hadoop/latest/bin \u0026\u0026 export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64/ \u0026\u0026 export LC_ALL=C.UTF-8 \u0026\u0026 export LANG=C.UTF-8 \u0026\u0026 cd /home/work/ \u0026\u0026 python3 batch.py --eplen 200 --hs 100 --lr 1e-4 --l2 0 --addpw 3 --pe 1000 --blossv 0.1 --bent 0.03 --rew 10 --save_every 1000 --rsp 1 --type modul --da tanh --nbiter 200002 --msize 13 --wp 0.0 --bs 30 --gc 4.0 --rngseed {{mesos.instance}}", "ramMB":8000, "gpus":1, "diskMB":8000, "cluster":"opusprodda1", "environment":"devel", "user":"tmiconi", "resourcePool": "/ailabs/p1/tmiconi", "instances":10, "isService":false, "cronSchedule":"", "custom":{}, "application":"testversion", "maxRetries":1, "constraints":{"sku":"p6000"}, "accessTypes":[], "dependencies":[], "cronCollisionPolicy":"CANCEL_NEW", "emailOnFail":[], "emailOnSucceed":[] } ================================================ FILE: maze/request_plastic.json ================================================ { "dockerImage":"tmiconi_rl", "tag":"master-test-2019_5_3_11_4_2", "name":"Maze_Plastic_hs101_eplen200_addpw3_bv0.1_bent0.03_rew10_bs30_gc4_lr1e-4_l20", "cpus":2.0, "cmdLine":"export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \u0026\u0026 PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/nvidia/bin:/opt/hadoop/latest/bin \u0026\u0026 export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64/ \u0026\u0026 export LC_ALL=C.UTF-8 \u0026\u0026 export LANG=C.UTF-8 \u0026\u0026 cd /home/work/ \u0026\u0026 python3 batch.py --eplen 200 --hs 101 --lr 1e-4 --l2 0 --addpw 3 --pe 1000 --blossv 0.1 --bent 0.03 --rew 10 --save_every 1000 --rsp 1 --type plastic --da tanh --nbiter 200002 --msize 13 --wp 0.0 --bs 30 --gc 4.0 --rngseed {{mesos.instance}}", "ramMB":8000, "gpus":1, "diskMB":8000, "cluster":"opusprodda1", "environment":"devel", "user":"tmiconi", "resourcePool": "/ailabs/p1/tmiconi", "instances":10, "isService":false, "cronSchedule":"", "custom":{}, "application":"testversion", "maxRetries":1, "constraints":{"sku":"p6000"}, "accessTypes":[], "dependencies":[], "cronCollisionPolicy":"CANCEL_NEW", "emailOnFail":[], "emailOnSucceed":[] } ================================================ FILE: maze/request_rnn.json ================================================ { "dockerImage":"tmiconi_rl", "tag":"master-test-2019_5_3_11_4_2", "name":"Maze_RNN_hs139_eplen200_addpw3_bv0.1_bent0.03_rew10_bs30_gc4_lr1e-4_l20", "cpus":2.0, "cmdLine":"export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \u0026\u0026 PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/nvidia/bin:/opt/hadoop/latest/bin \u0026\u0026 export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64/ \u0026\u0026 export LC_ALL=C.UTF-8 \u0026\u0026 export LANG=C.UTF-8 \u0026\u0026 cd /home/work/ \u0026\u0026 python3 batch.py --eplen 200 --hs 139 --lr 1e-4 --l2 0 --addpw 3 --pe 1000 --blossv 0.1 --bent 0.03 --rew 10 --save_every 1000 --rsp 1 --type rnn --da tanh --nbiter 200002 --msize 13 --wp 0.0 --bs 30 --gc 4.0 --rngseed {{mesos.instance}}", "ramMB":8000, "gpus":1, "diskMB":8000, "cluster":"opusprodda1", "environment":"devel", "user":"tmiconi", "resourcePool": "/ailabs/p1/tmiconi", "instances":10, "isService":false, "cronSchedule":"", "custom":{}, "application":"testversion", "maxRetries":1, "constraints":{"sku":"p6000"}, "accessTypes":[], "dependencies":[], "cronCollisionPolicy":"CANCEL_NEW", "emailOnFail":[], "emailOnSucceed":[] } ================================================ FILE: maze/request_rnn100neurons.json ================================================ { "dockerImage":"tmiconi_rl", "tag":"master-test-2019_5_3_11_4_2", "name":"Maze_RNN_hs00_eplen200_addpw3_bv0.1_bent0.03_rew10_bs30_gc4_lr1e-4_l20", "cpus":2.0, "cmdLine":"export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \u0026\u0026 PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/nvidia/bin:/opt/hadoop/latest/bin \u0026\u0026 export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64/ \u0026\u0026 export LC_ALL=C.UTF-8 \u0026\u0026 export LANG=C.UTF-8 \u0026\u0026 cd /home/work/ \u0026\u0026 python3 batch.py --eplen 200 --hs 100 --lr 1e-4 --l2 0 --addpw 3 --pe 1000 --blossv 0.1 --bent 0.03 --rew 10 --save_every 1000 --rsp 1 --type rnn --da tanh --nbiter 200002 --msize 13 --wp 0.0 --bs 30 --gc 4.0 --rngseed {{mesos.instance}}", "ramMB":8000, "gpus":1, "diskMB":8000, "cluster":"opusprodda1", "environment":"devel", "user":"tmiconi", "resourcePool": "/ailabs/p1/tmiconi", "instances":10, "isService":false, "cronSchedule":"", "custom":{}, "application":"testversion", "maxRetries":1, "constraints":{"sku":"p6000"}, "accessTypes":[], "dependencies":[], "cronCollisionPolicy":"CANCEL_NEW", "emailOnFail":[], "emailOnSucceed":[] } ================================================ FILE: maze/testbatch.py ================================================ import argparse import pdb import torch import torch.nn as nn from torch.autograd import Variable import numpy as np from numpy import random import torch.nn.functional as F from torch import optim from torch.optim import lr_scheduler import random import sys import pickle import time import os import platform #import makemaze import numpy as np #import matplotlib.pyplot as plt import glob np.set_printoptions(precision=4) NBDA = 1 # Number of different DA output neurons. At present, the code assumes NBDA=1 and will NOT WORK if you change this. np.set_printoptions(precision=4) ADDINPUT = 4 # 1 inputs for the previous reward, 1 inputs for numstep, 1 unused, 1 "Bias" inputs NBACTIONS = 4 # U, D, L, R RFSIZE = 3 # Receptive Field TOTALNBINPUTS = RFSIZE * RFSIZE + ADDINPUT + NBACTIONS ##ttype = torch.FloatTensor; #ttype = torch.cuda.FloatTensor; ##ttype = torch.FloatTensor; #ttype = torch.cuda.FloatTensor; class Network(nn.Module): def __init__(self, params): super(Network, self).__init__() self.rule = params['rule'] self.type = params['type'] self.softmax= torch.nn.functional.softmax #if params['activ'] == 'tanh': self.activ = F.tanh #elif params['activ'] == 'selu': # self.activ = F.selu #else: # raise ValueError('Must choose an activ function') if params['type'] == 'lstm': self.lstm = torch.nn.LSTM(TOTALNBINPUTS, params['hs']).cuda() elif params['type'] == 'rnn': self.i2h = torch.nn.Linear(TOTALNBINPUTS, params['hs']).cuda() self.w = torch.nn.Parameter((.01 * torch.rand(params['hs'], params['hs'])).cuda(), requires_grad=True) #self.inputnegmask = Variable(torch.ones(1, params['hs']), requires_grad=False).cuda() #self.inputnegmask[0, :TOTALNBINPUTS] = 0 # no modulation for 2nd half elif params['type'] == 'modplast' or params['type'] == 'modplast2': self.i2h = torch.nn.Linear(TOTALNBINPUTS, params['hs']).cuda() self.w = torch.nn.Parameter((.01 * torch.rand(params['hs'], params['hs'])).cuda(), requires_grad=True) self.alpha = torch.nn.Parameter((.01 * torch.rand(params['hs'], params['hs'])).cuda(), requires_grad=True) #self.w = torch.nn.Parameter((.01 * torch.t(torch.rand(params['hs'], params['hs']))).cuda(), requires_grad=True) #self.alpha = torch.nn.Parameter((.01 * torch.t(torch.rand(params['hs'], params['hs']))).cuda(), requires_grad=True) self.eta = torch.nn.Parameter((.01 * torch.ones(1)).cuda(), requires_grad=True) # Everyone has the same eta #self.inputnegmask = Variable(torch.ones(1, params['hs']), requires_grad=False).cuda() #self.inputnegmask[0, :TOTALNBINPUTS] = 0 # no modulation for 2nd half self.h2DA = torch.nn.Linear(params['hs'], NBDA).cuda() elif params['type'] == 'plastic' : self.i2h = torch.nn.Linear(TOTALNBINPUTS, params['hs']).cuda() self.w = torch.nn.Parameter((.01 * torch.rand(params['hs'], params['hs'])).cuda(), requires_grad=True) self.alpha = torch.nn.Parameter((.01 * torch.rand(params['hs'], params['hs'])).cuda(), requires_grad=True) self.eta = torch.nn.Parameter((.01 * torch.ones(1)).cuda(), requires_grad=True) # Everyone has the same eta #self.inputnegmask = Variable(torch.ones(1, params['hs']), requires_grad=False).cuda() #self.inputnegmask[0, :TOTALNBINPUTS] = 0 # no modulation for 2nd half elif params['type'] == 'modul' or params['type'] == 'modul2': self.i2h = torch.nn.Linear(TOTALNBINPUTS, params['hs']).cuda() self.w = torch.nn.Parameter((.01 * torch.rand(params['hs'], params['hs'])).cuda(), requires_grad=True) self.alpha = torch.nn.Parameter((.01 * torch.rand(params['hs'], params['hs'])).cuda(), requires_grad=True) # Note that initial eta is higher (faster) thanbefore self.eta = torch.nn.Parameter((.1 * torch.ones(1)).cuda(), requires_grad=True) # Everyone has the same eta self.etaet = torch.nn.Parameter((.1 * torch.ones(1)).cuda(), requires_grad=True) # Everyone has the same etapw self.etapw = torch.nn.Parameter((.1 * torch.ones(1)).cuda(), requires_grad=True) # Everyone has the same etapw self.h2DA = torch.nn.Linear(params['hs'], NBDA).cuda() # The daweights vectors are weight vectors from the DA output neurons to the network hidden (recurrent) neurons #self.daweights0 = Variable(torch.ones(1, params['hs']), requires_grad=False).cuda() #self.daweights0[0, (params['hs'] // 2):] = 0 # no modulation for 2nd half #self.inputnegmask = Variable(torch.ones(1, params['hs']), requires_grad=False).cuda() #self.inputnegmask[0, :TOTALNBINPUTS] = 0 # no modulation for 2nd half #else: # raise ValueError("Must specify which half of the network receives modulation") self.daweights1 = Variable(torch.ones(1, params['hs']), requires_grad=False).cuda() self.daweights1[0, :(params['hs'] // 4)] = 0 self.daweights1[0, -(params['hs'] // 4):] = 0 else: raise ValueError("Which network type?") self.h2o = torch.nn.Linear(params['hs'], NBACTIONS).cuda() self.h2v = torch.nn.Linear(params['hs'], 1).cuda() self.params = params # Notice that the vectors are row vectors, and the matrices are transposed wrt the usual order, following apparent pytorch conventions # Each *column* of w targets a single output neuron def forward(self, inputs, hidden, hebb, et, pw): BATCHSIZE = self.params['bs'] HS = self.params['hs'] if self.type == 'rnn': hactiv = self.activ(self.i2h(inputs).view(BATCHSIZE, HS, 1) + torch.matmul(self.w.view(1, HS, HS), hidden.view(BATCHSIZE, HS, 1))).view(BATCHSIZE, HS) hidden = hactiv #activout = self.softmax(self.h2o(hactiv)) activout = self.h2o(hactiv) # Linear! valueout = self.h2v(hactiv) #valueout = 0 elif self.type == 'plastic': # Each row of w and hebb contains the input weights to a single neuron # hidden = x, hactiv = y hactiv = self.activ(self.i2h(inputs).view(BATCHSIZE, HS, 1) + torch.matmul((self.w + torch.mul(self.alpha, hebb)), hidden.view(BATCHSIZE, HS, 1))).view(BATCHSIZE, HS) activout = self.h2o(hactiv) # Pure linear, raw scores - will be softmaxed later valueout = self.h2v(hactiv) if self.rule == 'hebb': deltahebb = torch.bmm(hactiv.view(BATCHSIZE, HS, 1), hidden.view(BATCHSIZE, 1, HS)) # batched outer product...should it be other way round? elif self.rule == 'oja': deltahebb = torch.mul(hactiv.view(BATCHSIZE, HS, 1), (hidden.view(BATCHSIZE, 1, HS) - torch.mul(self.w.view(1, HS, HS), hactiv.view(BATCHSIZE, HS, 1)))) else: raise ValueError("Must specify learning rule ('hebb' or 'oja')") if self.params['addpw'] == 3: # Note that there is no decay, even in the Hebb-rule case : additive only! # Hard clamp hebb = torch.clamp( hebb + self.eta * deltahebb, min=-1.0, max=1.0) elif self.params['addpw'] == 2: # Note that there is no decay, even in the Hebb-rule case : additive only! # Soft clamp hebb = torch.clamp( hebb + torch.clamp(self.eta * deltahebb, min=0.0) * (1 - hebb) + torch.clamp(self.eta * deltahebb, max=0.0) * (hebb + 1) , min=-1.0, max=1.0) elif self.params['addpw'] == 1: # Purely additive, tends to make the meta-learning diverge. No decay/clamp. hebb = hebb + self.eta * deltahebb elif self.params['addpw'] == 0: # We do it the normal way. Note that here, Hebb-rule is decaying. # There is probably a way to make it more efficient. # Note 2: For Oja's rule, there is no difference between addpw 0 and addpw1 if self.rule == 'hebb': hebb = (1 - self.eta) * hebb + self.eta * deltahebb elif self.rule == 'oja': hebb = hebb + self.eta * deltahebb hidden = hactiv elif self.type == 'modplast': # The actual network update should be the same as for "plastic". Only the Hebbian updates should be different # The columns of w and pw are the inputs weights to a single neuron hactiv = self.activ(self.i2h(inputs) + hidden.mm(self.w.view(HS, HS) + torch.mul(self.alpha.view(HS,HS), hebb))) activout = self.h2o(hactiv) # Pure linear, raw scores - will be softmaxed later valueout = self.h2v(hactiv) # Now computing the Hebbian updates... if self.params['da'] == 'tanh': DAout = F.tanh(self.h2DA(hactiv)) elif self.params['da'] == 'sig': DAout = F.sigmoid(self.h2DA(hactiv)) elif self.params['da'] == 'lin': DAout = self.h2DA(hactiv) else: raise ValueError("Which transformation for DAout ?") if self.rule == 'hebb': deltahebb = torch.bmm(hidden.unsqueeze(2), hactiv.unsqueeze(1))[0] elif self.rule == 'oja': deltahebb = torch.mul((hidden[0].unsqueeze(1) - torch.mul(hebb , hactiv[0].unsqueeze(0))) , hactiv[0].unsqueeze(0)) if self.params['addpw'] == 3: # Hard clamp, purely additive # Note that we do the same for Hebb and Oja's rule hebb1 = torch.clamp(hebb + DAout[0,0] * deltahebb, min=-1.0, max=1.0) elif self.params['addpw'] == 2: # Note that there is no decay, even in the Hebb-rule case : additive only! hebb1 = torch.clamp( hebb + torch.clamp(DAout[0,0] * deltahebb, min=0.0) * (1 - hebb) + torch.clamp(DAout[0,0] * deltahebb, max=0.0) * (hebb + 1) , min=-1.0, max=1.0) elif self.params['addpw'] == 1: # Purely additive, tends to make the meta-learning diverge # Note that we do the same for Hebb and Oja's rule hebb1 = hebb + DAout[0,0] * deltahebb elif self.params['addpw'] == 0: # We do it the normal way. Note that here, Hebb-rule is decaying. # There is probably a way to make it more efficient by grouping it with the computation of the other (non-modulated) half. # NOTE: This can go awry if DAout can go negative! # Note 2: For Oja's rule, there is no difference between addpw 0 and addpw1 if self.rule == 'hebb': hebb1 = (1 - DAout[0,0]) * hebb + DAout[0,0] * deltahebb elif self.rule == 'oja': hebb1= hebb + DAout[0,0] * deltahebb else: raise ValueError("Which additive form for plastic weights?") # The non-neuromodulated half of the network just does standard plasticity, using learned self.eta. if self.rule == 'hebb': hebb2 = (1 - self.eta) * hebb + self.eta * deltahebb elif self.rule == 'oja': hebb2 = hebb + self.eta * deltahebb else: raise ValueError("Must specify learning rule ('hebb' or 'oja')") if self.params['fm'] == 1: hebb = hebb1 elif self.params['fm'] == 0: hebb = torch.cat( (hebb1[:, :self.params['hs']//2], hebb2[:, self.params['hs']//2:]), dim=1) else: raise ValueError("Must select whether fully modulated or not") hidden = hactiv elif self.type == 'modplast_old': #Here we compute the same deltahebb for the whole network, and use #the same addpw for the whole network too. #Only difference between #modulated and non-modulated halves is whether eta is the network's #(learned) eta parameter or the neuromodulator output DAout # The rows of w and hebb are the inputs weights to a single neuron # hidden = x, hactiv = y hactiv = self.activ(self.i2h(inputs).view(BATCHSIZE, HS, 1) + torch.matmul((self.w + torch.mul(self.alpha, hebb)), hidden.view(BATCHSIZE, HS, 1))).view(BATCHSIZE, HS) activout = self.h2o(hactiv) # Pure linear, raw scores - will be softmaxed later valueout = self.h2v(hactiv) # Now computing the Hebbian updates... # With batching, DAout is a matrix of size BS x 1 (Really BS x NBDA, but we assume NBDA=1 for now in the deltahebb multiplication below) if self.params['da'] == 'tanh': DAout = F.tanh(self.h2DA(hactiv)) elif self.params['da'] == 'sig': DAout = F.sigmoid(self.h2DA(hactiv)) elif self.params['da'] == 'lin': DAout = self.h2DA(hactiv) else: raise ValueError("Which transformation for DAout ?") # deltahebb has shape BS x HS x HS # Each row of hebb contain the input weights to a neuron if self.rule == 'hebb': deltahebb = torch.bmm(hactiv.view(BATCHSIZE, HS, 1), hidden.view(BATCHSIZE, 1, HS)) # batched outer product...should it be other way round? elif self.rule == 'oja': deltahebb = torch.mul(hactiv.view(BATCHSIZE, HS, 1), (hidden.view(BATCHSIZE, 1, HS) - torch.mul(self.w.view(1, HS, HS), hactiv.view(BATCHSIZE, HS, 1)))) else: raise ValueError("Must specify learning rule ('hebb' or 'oja')") if self.params['addpw'] == 3: # Hard clamp, purely additive # Note that we do the same for Hebb and Oja's rule hebb1 = torch.clamp(hebb + DAout.view(BATCHSIZE, 1, 1) * deltahebb, min=-1.0, max=1.0) hebb2 = torch.clamp(hebb + self.eta * deltahebb, min=-1.0, max=1.0) elif self.params['addpw'] == 2: # Note that there is no decay, even in the Hebb-rule case : additive only! hebb1 = torch.clamp( hebb + torch.clamp(DAout.view(BATCHSIZE, 1, 1) * deltahebb, min=0.0) * (1 - hebb) + torch.clamp(DAout.view(BATCHSIZE, 1, 1) * deltahebb, max=0.0) * (hebb + 1) , min=-1.0, max=1.0) hebb2 = torch.clamp( hebb + torch.clamp(self.eta * deltahebb, min=0.0) * (1 - hebb) + torch.clamp(self.eta * deltahebb, max=0.0) * (hebb + 1) , min=-1.0, max=1.0) elif self.params['addpw'] == 1: # Purely additive. This will almost certainly diverge, don't use it! hebb1 = hebb + DAout.view(BATCHSIZE, 1, 1) * deltahebb hebb2 = hebb + self.eta * deltahebb elif self.params['addpw'] == 0: # We do it the old way. Note that here, Hebb-rule is decaying. # There is probably a way to make it more efficient # NOTE: THIS WILL GO AWRY if DAout is allowed to go outside [0,1]! # Note 2: For Oja's rule, there is no difference between addpw 0 and addpw1 if self.rule == 'hebb': hebb1 = (1 - DAout.view(BATCHSIZE,1,1)) * hebb + DAout.view(BATCHSIZE, 1, 1) * deltahebb hebb2 = (1 - self.eta) * hebb + self.eta * deltahebb elif self.rule == 'oja': hebb1= hebb + DAout.view(BATCHSIZE, 1, 1) * deltahebb hebb2= hebb + self.eta * deltahebb else: raise ValueError("Which additive form for plastic weights?") if self.params['fm'] == 1: hebb = hebb1 elif self.params['fm'] == 0: hebb = torch.cat( (hebb1[:, :self.params['hs']//2, :], hebb2[:, self.params['hs'] // 2:, :]), dim=1) # Maybe along dim=2 instead?... else: raise ValueError("Must select whether fully modulated or not") hidden = hactiv elif self.type == 'modul': # The rows of w and hebb are the inputs weights to a single neuron # hidden = x, hactiv = y hactiv = self.activ(self.i2h(inputs).view(BATCHSIZE, HS, 1) + torch.matmul((self.w + torch.mul(self.alpha, hebb)), hidden.view(BATCHSIZE, HS, 1))).view(BATCHSIZE, HS) activout = self.h2o(hactiv) # Pure linear, raw scores - will be softmaxed later valueout = self.h2v(hactiv) # Now computing the Hebbian updates... # With batching, DAout is a matrix of size BS x 1 (Really BS x NBDA, but we assume NBDA=1 for now in the deltahebb multiplication below) if self.params['da'] == 'tanh': DAout = F.tanh(self.h2DA(hactiv)) elif self.params['da'] == 'sig': DAout = F.sigmoid(self.h2DA(hactiv)) elif self.params['da'] == 'lin': DAout = self.h2DA(hactiv) else: raise ValueError("Which transformation for DAout ?") # We need to select the order of operations; network update, e.t. update, neuromodulated incorporation into plastic weights # One possibility (for now go with this one): # - computing all outputs from current inputs, including DA # - incorporating neuromodulated Hebb/eligibility trace into plastic weights # - computing updated hebb/eligibility traces # Another possibility (modul2): # - computing all outputs from current inputs, including DA # - computing updated Hebb/eligibility traces # - incorporating this modified Hebb into plastic weights through neuromodulation # For Hebb (not et or pw); this is only used if fm=0, for the non-modulated part of the network # If fm=0: # One half of the network receives neuromodulation. The other just # does plain Hebbian plasticity; note that the eta's for the # Hebbian trace and the eligibility trace are different if self.params['fm']==0: if self.rule == 'hebb': deltahebb = torch.bmm(hactiv.view(BATCHSIZE, HS, 1), hidden.view(BATCHSIZE, 1, HS)) # batched outer product...should it be other way round? elif self.rule == 'oja': deltahebb = torch.mul(hactiv.view(BATCHSIZE, HS, 1), (hidden.view(BATCHSIZE, 1, HS) - torch.mul(self.w.view(1, HS, HS), hactiv.view(BATCHSIZE, HS, 1)))) else: raise ValueError("Must specify learning rule ('hebb' or 'oja')") # In modul2 we compute deltaet and update et here too; here we compute them later if self.params['addpw'] == 3: # Hard clamp deltapw = DAout.view(BATCHSIZE,1,1) * et pw1 = torch.clamp(pw + deltapw, min=-1.0, max=1.0) if self.params['fm']==0: hebb = torch.clamp(hebb + self.eta * deltahebb, min=-1.0, max=1.0) elif self.params['addpw'] == 2: deltapw = DAout.view(BATCHSIZE,1,1) * et # This constrains the pw to stay within [-1, 1] (we could also do that by putting a tanh on top of it, but instead we want pw itself to remain within that range, to avoid large gradients and facilitate movement back to 0) # The outer clamp is there for safety. In theory the expression within that clamp is "softly" constrained to stay within [-1, 1], but finite-size effects might throw it off. pw1 = torch.clamp( pw + torch.clamp(deltapw, min=0.0) * (1 - pw) + torch.clamp(deltapw, max=0.0) * (pw + 1) , min=-.99999, max=.99999) if self.params['fm']==0: hebb = torch.clamp( hebb + torch.clamp(self.eta * deltahebb, min=0.0) * (1 - hebb) + torch.clamp(self.eta * deltahebb, max=0.0) * (hebb + 1) , min=-1.0, max=1.0) if self.params['addpw'] == 1: # Purely additive, tends to make the meta-learning diverge deltapw = DAout.view(BATCHSIZE,1,1) * et pw1 = pw + deltapw if self.params['fm'] == 0: hebb = hebb + self.eta * deltahebb elif self.params['addpw'] == 0: # We do it the old way, with a decay term. # This will FAIL if DAout is allowed to go outside [0,1] # Note: this makes the plastic weights decaying! pw1 = (1 - DAout.view(BATCHSIZE,1,1)) * pw1 + DAout.view(BATCHSIZE, 1, 1) * et if self.params['fm']==0: if self.rule == 'hebb': hebb = (1 - self.eta) * hebb + self.eta * deltahebb elif self.rule == 'oja': hebb= hebb + self.eta * deltahebb # Should we have a fully neuromodulated network, or only half? if self.params['fm'] == 1: pw = pw1 elif self.params['fm'] == 0: pw = torch.cat( (hebb[:, :self.params['hs']//2, :], pw1[:, self.params['hs'] // 2:, :]), dim=1) # Maybe along dim=2 instead?... else: raise ValueError("Must select whether fully modulated or not") # Updating the eligibility trace - always a simple decay term. # Note that self.etaet != self.eta (which is used for hebb, i.e. the non-modulated part) deltaet = torch.bmm(hactiv.view(BATCHSIZE, HS, 1), hidden.view(BATCHSIZE, 1, HS)) # batched outer product...should it be other way round? et = (1 - self.etaet) * et + self.etaet * deltaet hidden = hactiv elif self.type == 'modul2': # The rows of w and hebb are the inputs weights to a single neuron # hidden = x, hactiv = y hactiv = self.activ(self.i2h(inputs).view(BATCHSIZE, HS, 1) + torch.matmul((self.w + torch.mul(self.alpha, hebb)), hidden.view(BATCHSIZE, HS, 1))).view(BATCHSIZE, HS) activout = self.h2o(hactiv) # Pure linear, raw scores - will be softmaxed later valueout = self.h2v(hactiv) # Now computing the Hebbian updates... # With batching, DAout is a matrix of size BS x 1 (Really BS x NBDA, but we assume NBDA=1 for now in the deltahebb multiplication below) if self.params['da'] == 'tanh': DAout = F.tanh(self.h2DA(hactiv)) elif self.params['da'] == 'sig': DAout = F.sigmoid(self.h2DA(hactiv)) elif self.params['da'] == 'lin': DAout = self.h2DA(hactiv) else: raise ValueError("Which transformation for DAout ?") # We need to select the order of operations; network update, e.t. update, neuromodulated incorporation into plastic weights # One possibility (for now go with this one): # - computing all outputs from current inputs, including DA # - incorporating neuromodulated Hebb/eligibility trace into plastic weights # - computing updated hebb/eligibility traces # Another possibility (modul2): # - computing all outputs from current inputs, including DA # - computing updated Hebb/eligibility traces # - incorporating this modified Hebb into plastic weights through neuromodulation # For Hebb (not et or pw); this is only used if fm=0, for the non-modulated part of the network # If fm=0: # One half of the network receives neuromodulation. The other just # does plain Hebbian plasticity; note that the eta's for the # Hebbian trace and the eligibility trace are different if self.params['fm']==0: if self.rule == 'hebb': deltahebb = torch.bmm(hactiv.view(BATCHSIZE, HS, 1), hidden.view(BATCHSIZE, 1, HS)) # batched outer product...should it be other way round? elif self.rule == 'oja': deltahebb = torch.mul(hactiv.view(BATCHSIZE, HS, 1), (hidden.view(BATCHSIZE, 1, HS) - torch.mul(self.w.view(1, HS, HS), hactiv.view(BATCHSIZE, HS, 1)))) else: raise ValueError("Must specify learning rule ('hebb' or 'oja')") # Updating the eligibility trace - always a simple decay term. # Note that self.etaet != self.eta (which is used for hebb, i.e. the non-modulated part) deltaet = torch.bmm(hactiv.view(BATCHSIZE, HS, 1), hidden.view(BATCHSIZE, 1, HS)) # batched outer product...should it be other way round? et = (1 - self.etaet) * et + self.etaet * deltaet if self.params['addpw'] == 3: # Hard clamp deltapw = DAout.view(BATCHSIZE,1,1) * et pw1 = torch.clamp(pw + deltapw, min=-1.0, max=1.0) if self.params['fm']==0: hebb = torch.clamp(hebb + self.eta * deltahebb, min=-1.0, max=1.0) elif self.params['addpw'] == 2: deltapw = DAout.view(BATCHSIZE,1,1) * et # This constrains the pw to stay within [-1, 1] (we could also do that by putting a tanh on top of it, but instead we want pw itself to remain within that range, to avoid large gradients and facilitate movement back to 0) # The outer clamp is there for safety. In theory the expression within that clamp is "softly" constrained to stay within [-1, 1], but finite-size effects might throw it off. pw1 = torch.clamp( pw + torch.clamp(deltapw, min=0.0) * (1 - pw) + torch.clamp(deltapw, max=0.0) * (pw + 1) , min=-.99999, max=.99999) if self.params['fm']==0: hebb = torch.clamp( hebb + torch.clamp(self.eta * deltahebb, min=0.0) * (1 - hebb) + torch.clamp(self.eta * deltahebb, max=0.0) * (hebb + 1) , min=-1.0, max=1.0) if self.params['addpw'] == 1: # Purely additive, tends to make the meta-learning diverge deltapw = DAout.view(BATCHSIZE,1,1) * et pw1 = pw + deltapw if self.params['fm'] == 0: hebb = hebb + self.eta * deltahebb elif self.params['addpw'] == 0: # We do it the old way, with a decay term. # This will FAIL if DAout is allowed to go outside [0,1] # Note: this makes the plastic weights decaying! pw1 = (1 - DAout.view(BATCHSIZE,1,1)) * pw1 + DAout.view(BATCHSIZE, 1, 1) * et if fm==0: if self.rule == 'hebb': hebb = (1 - self.eta) * hebb + self.eta * deltahebb elif self.rule == 'oja': hebb= hebb + self.eta * deltahebb # Should we have a fully neuromodulated network, or only half? if self.params['fm'] == 1: pw = pw1 elif self.params['fm'] == 0: pw = torch.cat( (hebb[:, :self.params['hs']//2, :], pw1[:, self.params['hs'] // 2:, :]), dim=1) # Maybe along dim=2 instead?... else: raise ValueError("Must select whether fully modulated or not") hidden = hactiv return activout, valueout, hidden, hebb, et, pw def initialZeroHebb(self): #return Variable(torch.zeros(self.params['bs'], self.params['hs'], self.params['hs']) , requires_grad=False).cuda() return Variable(torch.zeros(self.params['hs'], self.params['hs']) , requires_grad=False).cuda() def initialZeroPlasticWeights(self): return Variable(torch.zeros(self.params['bs'], self.params['hs'], self.params['hs']) , requires_grad=False).cuda() def initialZeroState(self): BATCHSIZE = self.params['bs'] return Variable(torch.zeros(BATCHSIZE, self.params['hs']), requires_grad=False ).cuda() def train(paramdict): #params = dict(click.get_current_context().params) #TOTALNBINPUTS = RFSIZE * RFSIZE + ADDINPUT + NBNONRESTACTIONS print("Starting training...") params = {} #params.update(defaultParams) params.update(paramdict) print("Passed params: ", params) print(platform.uname()) #params['nbsteps'] = params['nbshots'] * ((params['prestime'] + params['interpresdelay']) * params['nbclasses']) + params['prestimetest'] # Total number of steps per episode suffix = "btch_"+"".join([str(x)+"_" if pair[0] is not 'nbsteps' and pair[0] is not 'rngseed' and pair[0] is not 'save_every' and pair[0] is not 'test_every' and pair[0] is not 'pe' else '' for pair in sorted(zip(params.keys(), params.values()), key=lambda x:x[0] ) for x in pair])[:-1] + "_rngseed_" + str(params['rngseed']) # Turning the parameters into a nice suffix for filenames # Initialize random seeds (first two redundant?) print("Setting random seeds") np.random.seed(params['rngseed']); random.seed(params['rngseed']); torch.manual_seed(params['rngseed']) #print(click.get_current_context().params) print("Initializing network") net = Network(params) print ("Shape of all optimized parameters:", [x.size() for x in net.parameters()]) allsizes = [torch.numel(x.data.cpu()) for x in net.parameters()] print ("Size (numel) of all optimized elements:", allsizes) print ("Total size (numel) of all optimized elements:", sum(allsizes)) #total_loss = 0.0 print("Initializing optimizer") optimizer = torch.optim.Adam(net.parameters(), lr=1.0*params['lr'], eps=1e-4, weight_decay=params['l2']) #optimizer = torch.optim.SGD(net.parameters(), lr=1.0*params['lr']) #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, gamma=params['gamma'], step_size=params['steplr']) #LABSIZE = params['lsize'] #lab = np.ones((LABSIZE, LABSIZE)) #CTR = LABSIZE // 2 # Simple cross maze #lab[CTR, 1:LABSIZE-1] = 0 #lab[1:LABSIZE-1, CTR] = 0 # Double-T maze #lab[CTR, 1:LABSIZE-1] = 0 #lab[1:LABSIZE-1, 1] = 0 #lab[1:LABSIZE-1, LABSIZE - 2] = 0 # Grid maze #lab[1:LABSIZE-1, 1:LABSIZE-1].fill(0) #for row in range(1, LABSIZE - 1): # for col in range(1, LABSIZE - 1): # if row % 2 == 0 and col % 2 == 0: # lab[row, col] = 1 #lab[CTR,CTR] = 0 # Not strictly necessary, but perhaps helps loclization by introducing a detectable irregularity in the center BATCHSIZE = params['bs'] LABSIZE = params['msize'] lab = np.ones((LABSIZE, LABSIZE)) CTR = LABSIZE // 2 # Simple cross maze #lab[CTR, 1:LABSIZE-1] = 0 #lab[1:LABSIZE-1, CTR] = 0 # Double-T maze #lab[CTR, 1:LABSIZE-1] = 0 #lab[1:LABSIZE-1, 1] = 0 #lab[1:LABSIZE-1, LABSIZE - 2] = 0 # Grid maze lab[1:LABSIZE-1, 1:LABSIZE-1].fill(0) for row in range(1, LABSIZE - 1): for col in range(1, LABSIZE - 1): if row % 2 == 0 and col % 2 == 0: lab[row, col] = 1 # Not strictly necessary, but cleaner since we start the agent at the # center for each episode; may help loclization in some maze sizes # (including 13 and 9, but not 11) by introducing a detectable irregularity # in the center: lab[CTR,CTR] = 0 all_losses = [] all_losses_objective = [] all_total_rewards = [] all_losses_v = [] lossbetweensaves = 0 nowtime = time.time() meanrewards = np.zeros((LABSIZE, LABSIZE)) meanrewardstmp = np.zeros((LABSIZE, LABSIZE, params['eplen'])) pos = 0 hidden = net.initialZeroState() hebb = net.initialZeroHebb() pw = net.initialZeroPlasticWeights() #celoss = torch.nn.CrossEntropyLoss() # For supervised learning - not used here print("Starting episodes!") for numiter in range(params['nbiter']): PRINTTRACE = 0 #if (numiter+1) % (1 + params['pe']) == 0: if (numiter+1) % (params['pe']) == 0: PRINTTRACE = 1 #lab = makemaze.genmaze(size=LABSIZE, nblines=4) #count = np.zeros((LABSIZE, LABSIZE)) # Select the reward location for this episode - not on a wall! # And not on the center either! (though not sure how useful that restriction is...) # We always start the episode from the center (when hitting reward, we may teleport either to center or to a random location depending on params['rsp']) posr = {}; posc = {} rposr = {}; rposc = {} for nb in range(BATCHSIZE): # Note: it doesn't really matter if the reward is on the center. All we need is not to put it on a wall or pillar (lab=1) myrposr = 0; myrposc = 0 # This one is for positioning the reward only in the periphery! #while lab[myrposr, myrposc] == 1 or (myrposr != 1 and myrposr != LABSIZE -2 and myrposc != 1 and myrposc != LABSIZE-2): while lab[myrposr, myrposc] == 1 or (myrposr == CTR and myrposc == CTR): myrposr = np.random.randint(1, LABSIZE - 1) myrposc = np.random.randint(1, LABSIZE - 1) rposr[nb] = myrposr; rposc[nb] = myrposc #print("Reward pos:", rposr, rposc) # Agent always starts an episode from the center posc[nb] = CTR posr[nb] = CTR optimizer.zero_grad() loss = 0 lossv = 0 hidden = net.initialZeroState() hebb = net.initialZeroHebb() et = net.initialZeroHebb() # Eligibility Trace is identical to Hebbian Trace in shape pw = net.initialZeroPlasticWeights() numactionchosen = 0 reward = np.zeros(BATCHSIZE) sumreward = np.zeros(BATCHSIZE) rewards = [] vs = [] logprobs = [] dist = 0 numactionschosen = np.zeros(BATCHSIZE, dtype='int32') #reloctime = np.random.randint(params['eplen'] // 4, (3 * params['eplen']) // 4) #print("EPISODE ", numiter) for numstep in range(params['eplen']): ## We randomly relocate the reward halfway through #if numstep == reloctime: # rposr = 0; rposc = 0 # while lab[rposr, rposc] == 1 or (rposr == CTR and rposc == CTR): # rposr = np.random.randint(1, LABSIZE - 1) # rposc = np.random.randint(1, LABSIZE - 1) inputs = np.zeros((BATCHSIZE, TOTALNBINPUTS), dtype='float32') labg = lab.copy() #labg[rposr, rposc] = -1 # The agent can see the reward if it falls within its RF for nb in range(BATCHSIZE): inputs[nb, 0:RFSIZE * RFSIZE] = labg[posr[nb] - RFSIZE//2:posr[nb] + RFSIZE//2 +1, posc[nb] - RFSIZE //2:posc[nb] + RFSIZE//2 +1].flatten() * 1.0 # Previous chosen action inputs[nb, RFSIZE * RFSIZE +1] = 1.0 # Bias neuron inputs[nb, RFSIZE * RFSIZE +2] = numstep / params['eplen'] #inputs[0, RFSIZE * RFSIZE +3] = 1.0 * reward # Reward from previous time step inputs[nb, RFSIZE * RFSIZE +3] = 1.0 * reward[nb] inputs[nb, RFSIZE * RFSIZE + ADDINPUT + numactionschosen[nb]] = 1 #inputs = 100.0 * inputs # input boosting : Very bad with clamp=0 inputsC = torch.from_numpy(inputs).cuda() # Might be better: #if rposr == posr and rposc = posc: # inputs[0][-4] = 100.0 #else: # inputs[0][-4] = 0 # Running the network ## Running the network y, v, hidden, hebb, et, pw = net(Variable(inputsC, requires_grad=False), hidden, hebb, et, pw) # y should output raw scores, not probas # For now: #numactionchosen = np.argmax(y.data[0]) # But wait, this is bad, because the network needs to see the # reward signal to guide its own (within-episode) learning... and # argmax might not provide enough exploration for this! #ee = np.exp(y.data[0].cpu().numpy()) #numactionchosen = np.random.choice(NBNONRESTACTIONS, p = ee / (1e-10 + np.sum(ee))) y = F.softmax(y, dim=1) # Must convert y to probas to use this ! distrib = torch.distributions.Categorical(y) actionschosen = distrib.sample() logprobs.append(distrib.log_prob(actionschosen)) numactionschosen = actionschosen.data.cpu().numpy() # Turn to scalar reward = np.zeros(BATCHSIZE, dtype='float32') #if numiter == 115 and numstep == 99: identical #if numiter == 125 and numstep == 99: diff #if numiter == 120 and numstep == 99: identical #if numiter == 122 and numstep == 99: diff #if numiter == 121 and numstep == 99: identical #if numiter == 122 and numstep == 14: diff (a little, ~1e-3) #if numiter == 122 and numstep == 11: diff (2e-2), rposr,rposc identical, posr different (5 vs 9 for batch) #if numiter == 122 and numstep == 10: identical, rposr 5, rposc 6, posr 5, posc 6 for both #### #if numiter == 730 and numstep == 12: diff #if numiter == 700 and numstep == 12: # diff (not by much.. in the y) #if numiter == 600 and numstep == 12: # identical ... or so I thought? #if numiter == 650 and numstep == 12: # diff (1e-6) #if numiter == 625 and numstep == 12: # diff (1e-5) #if numiter == 612 and numstep == 12: # diff (1e-6) #if numiter == 606 and numstep == 12: # diff (1e-7) #if numiter == 603 and numstep == 12: # diff (1e-6) #if numiter == 601 and numstep == 12: # diff #if numiter == 600 and numstep == 99: # diff #if numiter == 600 and numstep == 15: #diff #if numiter == 600 and numstep == 1: #diff #if numiter == 500 and numstep == 1: diff #if numiter == 152 and numstep == 1: identical #if numiter == 352 and numstep == 1: # diff #if numiter == 252 and numstep == 1: # identical! #if numiter == 302 and numstep == 1: # identical! #if numiter == 332 and numstep == 1: # diff #if numiter == 316 and numstep == 1: # diff #if numiter == 309 and numstep == 1: # diff #if numiter == 304 and numstep == 1: # identical #if numiter == 306 and numstep == 1: # identical #if numiter == 308 and numstep == 1: # diff #if numiter == 307 and numstep == 1: # diff #if numiter == 306 and numstep == 51: # diff #if numiter == 306 and numstep == 21: # diff #if numiter == 306 and numstep == 1: # identical (confirm) #if numiter == 306 and numstep == 5: #diff #if numiter == 306 and numstep == 3: # diff #if numiter == 306 and numstep == 2: # identical, rposc rposr 3,4, posc posr 5, 3... hebb noticeably diff! 1e-6; alpha/w identical, h2o(hidden) identical #if numiter == 306 and numstep == 1: h2da(hidden) identical, but h2v(hidden) different! h2o(hidden) identical, hebb different... h2v has identical weights+biases though! hidden identical... # wait, hidden NOT identical - pow(2).sum gives exact same result to 36 decimals, but hidden[0,25] does not! #if numiter == 305 and numstep == 1: # lol, hidden[0,2] is different.... #if numiter == 150 and numstep == 99: # hidden[0,2] different, event though abss().sum(): identical #if numiter == 99 and numstep == 99: # hidden[0,2] identical... #if numiter == 101 and numstep == 99: # various components of hidden identical #if numiter == 221 and numstep == 99: # hidden different; the difference seems to be caused by loss.backward/optimizer.step.. and disappears if lossv is commented out?! blossv=0 also removes it! vs[15][0] also different (with blossv=0, so no diff in hidden, and no diff in h2v either!) vs[0][0] identical, vs[1][0] different.....(by ~1e-8) again with blossv=0... if I try with normal blossv but preventing loss.backward/optimizer.step, then I get identical vs[2][0]/vs[-1][0]... if I comment out blossv*lossv addition: hidden identical, vs[-1][0] different, h2DA identical, h2v dot hidden.t() identical... v is identical but the vs are different, how can that be?? if I put the set_trace just after vs.append(v), v[-1][0] is identical, but v[2][0] is not.. and neither is vs[-2][0] !! #if numiter == 221 and numstep == 98, interup just after vs.append(v), blossv=0: now vs[-1][0] also different, as is v... hidden is different too! Confirmed that if you stop at 99 they are identical (How!!!) #if numiter == 121 and numstep == 98: # identical #if numiter == 151 and numstep == 98: # identical #if numiter == 191 and numstep == 98: #identical #if numiter == 208 and numstep == 98: #if numiter == 215 and numstep == 98: #if numiter == 218 and numstep == 98: # all identical #if numiter == 220 and numstep == 98: #h identical, but vs[-]1[0] different! #if numiter == 218 and numstep == 98: #h identical, including vs[-1][0]... but not vs[-21][0] ! vs[2][0] identical though! Lol, all vs identical except vs[-21][0]... #if numiter == 218 and numstep == 77: #h identical,but v different (vs[-1][0] identical. as expect, net.h2v(hidden) different, h2v.weight dot hidden different... but h2v weight/bias have identical abs sum, and so does hidden! torch.matmul(hidden[0,0:14] , net.h2v.weight[0,0:14]) identical, but :15 different! torch.sum(hidden[0,0:15] - net.h2v.weight[0,0:15]) identical... but if you replace - with * or +, different! hidden[0,14] is different! lol, hidden.sum() and hidden.abs().sum() are identical, hidden[0,0:].sum()/abs().sum() identical, but hidden[0,0:24].sum() is different! hidden[0,24:].sum() is identical too... BASICALLY, the w's and alphas have several differences in the 1e-9 range; the h2v don't #if numiter == 120 and numstep == 98: # w's are already different... #if numiter == 101 and numstep == 98: #w's identical #if numiter == 102 and numstep == 98: #w's identical #if numiter == 103 and numstep == 98: # # w's differ in the 1e-10 range # pdb.set_trace() # torch.set_printoptions(precision=30) # np.savetxt('a2.txt', all_losses_objective) # p "{:.36f}".format(hidden.abs().sum().data.cpu().numpy()[0]) # Can also give identical results despite some different components?? # BAD - may erase too small differences in individual components (bc of squaring) p "{:.36f}".format(net.h2DA(hidden).pow(2).sum().data.cpu().numpy()[0]) # p "{:.36f}".format(hidden[0,2].data.cpu().numpy()[0]) # p "{:.36f}".format(vs[-1][0].data.cpu().numpy()[0]) for nb in range(BATCHSIZE): myreward = 0 numactionchosen = numactionschosen[nb] tgtposc = posc[nb] tgtposr = posr[nb] if numactionchosen == 0: # Up tgtposr -= 1 elif numactionchosen == 1: # Down tgtposr += 1 elif numactionchosen == 2: # Left tgtposc -= 1 elif numactionchosen == 3: # Right tgtposc += 1 else: raise ValueError("Wrong Action") reward[nb] = 0.0 # The reward for this step if lab[tgtposr][tgtposc] == 1: reward[nb] -= params['wp'] else: #dist += 1 posc[nb] = tgtposc posr[nb] = tgtposr # Did we hit the reward location ? Increase reward and teleport! # Note that it doesn't matter if we teleport onto the reward, since reward hitting is only evaluated after the (obligatory) move if rposr[nb] == posr[nb] and rposc[nb] == posc[nb]: reward[nb] += params['rew'] posr[nb]= np.random.randint(1, LABSIZE - 1) posc[nb] = np.random.randint(1, LABSIZE - 1) while lab[posr[nb], posc[nb]] == 1 or (rposr[nb] == posr[nb] and rposc[nb] == posc[nb]): posr[nb] = np.random.randint(1, LABSIZE - 1) posc[nb] = np.random.randint(1, LABSIZE - 1) rewards.append(reward) vs.append(v) sumreward += reward #loss += ( params['bent'] * y.pow(2).sum() / BATCHSIZE ) # We want to penalize concentration, i.e. encourage diversity; our version of PyTorch does not have an entropy() function for Distribution. Note: .2 may be too strong, .04 may be too weak. loss += params['bent'] * y.pow(2).sum() # We want to penalize concentration, i.e. encourage diversity; our version of PyTorch does not have an entropy() function for Distribution. Note: .2 may be too strong, .04 may be too weak. #lossentmean = .99 * lossentmean + .01 * ( params['bent'] * y.pow(2).sum() / BATCHSIZE ).data[0] # We want to penalize concentration, i.e. encourage diversity; our version of PyTorch does not have an entropy() function for Distribution. Note: .2 may be too strong, .04 may be too weak. if PRINTTRACE: #print("Step ", numstep, "- GI: ", goodinputs, ", GA: ", goodaction, " Inputs: ", inputsN, " - Outputs: ", y.data.cpu().numpy(), " - action chosen: ", numactionchosen, # " - inputsthisstep:", inputsthisstep, " - mean abs pw: ", np.mean(np.abs(pw.data.cpu().numpy())), " -Rew: ", reward) print("Step ", numstep, " Inputs (to 1st in batch): ", inputs[0, :TOTALNBINPUTS], " - Outputs(1st in batch): ", y[0].data.cpu().numpy(), " - action chosen(1st in batch): ", numactionschosen[0], " - mean abs pw: ", np.mean(np.abs(pw.data.cpu().numpy())), " -Reward (this step, 1st in batch): ", reward[0]) # Episode is done, now let's do the actual computations #R = Variable(torch.zeros(BATCHSIZE).cuda(), requires_grad=False) R = 0 gammaR = params['gr'] for numstepb in reversed(range(params['eplen'])) : #R = gammaR * R + Variable(torch.from_numpy(rewards[numstepb]).cuda(), requires_grad=False) #R = gammaR * R + float(rewards[numstepb][0]) #ctrR = R - vs[numstepb][0] #lossv += ctrR.pow(2).sum() / BATCHSIZE #loss -= (logprobs[numstepb] * ctrR.detach()).sum() / BATCHSIZE # Need to check if detach() is OK R = gammaR * R + float(rewards[numstepb][0]) lossv += (vs[numstepb][0] - R).pow(2) loss -= logprobs[numstepb] * (R - vs[numstepb].data[0][0]) # Not sure if the "data" is needed... put it b/c of worry about weird gradient flows #elif params['algo'] == 'REI': # R = sumreward # baseline = meanrewards[rposr, rposc] # for numstepb in reversed(range(params['eplen'])) : # loss -= logprobs[numstepb] * (R - baseline) #elif params['algo'] == 'REINOB': # R = sumreward # for numstepb in reversed(range(params['eplen'])) : # loss -= logprobs[numstepb] * R #elif params['algo'] == 'REITMP': # R = 0 # for numstepb in reversed(range(params['eplen'])) : # R = gammaR * R + rewards[numstepb] # loss -= logprobs[numstepb] * R #elif params['algo'] == 'REITMPB': # R = 0 # for numstepb in reversed(range(params['eplen'])) : # R = gammaR * R + rewards[numstepb] # loss -= logprobs[numstepb] * (R - meanrewardstmp[rposr, rposc, numstepb]) #else: # raise ValueError("Which algo?") #meanrewards[rposr, rposc] = (1.0 - params['nu']) * meanrewards[rposr, rposc] + params['nu'] * sumreward #R = 0 #for numstepb in reversed(range(params['eplen'])) : # R = gammaR * R + rewards[numstepb] # meanrewardstmp[rposr, rposc, numstepb] = (1.0 - params['nu']) * meanrewardstmp[rposr, rposc, numstepb] + params['nu'] * R loss += params['blossv'] * lossv loss /= params['eplen'] if PRINTTRACE: if True: #params['algo'] == 'A3C': print("lossv: ", lossv.data.cpu().numpy()[0]) print ("Total reward for this episode (all):", sumreward, "Dist:", dist) #if params['squash'] == 1: # if sumreward < 0: # sumreward = -np.sqrt(-sumreward) # else: # sumreward = np.sqrt(sumreward) #elif params['squash'] == 0: # pass #else: # raise ValueError("Incorrect value for squash parameter") #loss *= sumreward #if numiter == 102 : # w identical, final hidden identical... but loss slightly different! as is lossv (shouldnt matter since its addition to loss is commented out). vs apparently identical #if numiter == 182 : # identical loss (after fixing the rewards computations) #if numiter == 202 : # identical loss (but loss_between_saves different, which means some losses different?...) #if numiter == 222 : # loss is different #if numiter == 33 : if numiter == 101 : pdb.set_trace() #for p in net.parameters(): # p.grad.data.clamp_(-params['clp'], params['clp']) if numiter > 100: # Burn-in period for meanrewards loss.backward() optimizer.step() #torch.cuda.empty_cache() #print(sumreward) lossnum = loss.data[0] lossbetweensaves += lossnum all_losses_objective.append(lossnum) all_total_rewards.append(sumreward.mean()) #all_losses_v.append(lossv.data[0]) #total_loss += lossnum if (numiter+1) % params['pe'] == 0: np.savetxt('a1.txt', all_losses_objective) print(numiter, "====") print("Mean loss: ", lossbetweensaves / params['pe']) lossbetweensaves = 0 print("Mean reward (across batch and last", params['pe'], "eps.): ", np.sum(all_total_rewards[-params['pe']:])/ params['pe']) #print("Mean reward (across batch): ", sumreward.mean()) previoustime = nowtime nowtime = time.time() print("Time spent on last", params['pe'], "iters: ", nowtime - previoustime) if params['type'] == 'plastic' or params['type'] == 'lstmplastic': print("ETA: ", net.eta.data.cpu().numpy(), "alpha[0,1]: ", net.alpha.data.cpu().numpy()[0,1], "w[0,1]: ", net.w.data.cpu().numpy()[0,1] ) elif params['type'] == 'modul': print("ETA: ", net.eta.data.cpu().numpy(), " etaet: ", net.etaet.data.cpu().numpy(), " mean-abs pw: ", np.mean(np.abs(pw.data.cpu().numpy()))) elif params['type'] == 'rnn': print("w[0,1]: ", net.w.data.cpu().numpy()[0,1] ) if (numiter+1) % params['save_every'] == 0: print("Saving files...") # lossbetweensaves /= params['save_every'] # print("Average loss over the last", params['save_every'], "episodes:", lossbetweensaves) # print("Alternative computation (should be equal):", np.mean(all_losses_objective[-params['save_every']:])) losslast100 = np.mean(all_losses_objective[-100:]) print("Average loss over the last 100 episodes:", losslast100) # # Instability detection; necessary for SELUs, which seem to be divergence-prone # # Note that if we are unlucky enough to have diverged within the last 100 timesteps, this may not save us. # if losslast100 > 2 * lossbetweensavesprev: # print("We have diverged ! Restoring last savepoint!") # net.load_state_dict(torch.load('./torchmodel_'+suffix + '.txt')) # else: print("Saving local files...") #with open('params_'+suffix+'.dat', 'wb') as fo: # #pickle.dump(net.w.data.cpu().numpy(), fo) # #pickle.dump(net.alpha.data.cpu().numpy(), fo) # #pickle.dump(net.eta.data.cpu().numpy(), fo) # #pickle.dump(all_losses, fo) # pickle.dump(params, fo) #with open('loss_'+suffix+'.txt', 'w') as thefile: # for item in all_losses_objective: # thefile.write("%s\n" % item) #with open('lossv_'+suffix+'.txt', 'w') as thefile: # for item in all_losses_v: # thefile.write("%s\n" % item) with open('loss_'+suffix+'.txt', 'w') as thefile: for item in all_total_rewards[::10]: thefile.write("%s\n" % item) torch.save(net.state_dict(), 'torchmodel_'+suffix+'.dat') with open('params_'+suffix+'.dat', 'wb') as fo: pickle.dump(params, fo) if os.path.isdir('/mnt/share/tmiconi'): print("Transferring to NFS storage...") for fn in ['params_'+suffix+'.dat', 'loss_'+suffix+'.txt', 'torchmodel_'+suffix+'.dat']: result = os.system( 'cp {} {}'.format(fn, '/mnt/share/tmiconi/modulmaze/'+fn)) print("Done!") # lossbetweensavesprev = lossbetweensaves # lossbetweensaves = 0 # sys.stdout.flush() # sys.stderr.flush() if __name__ == "__main__": #defaultParams = { # 'type' : 'lstm', # 'seqlen' : 200, # 'hs': 500, # 'activ': 'tanh', # 'steplr': 10e9, # By default, no change in the learning rate # 'gamma': .5, # The annealing factor of learning rate decay for Adam # 'imagesize': 31, # 'nbiter': 30000, # 'lr': 1e-4, # 'test_every': 10, # 'save_every': 3000, # 'rngseed':0 #} parser = argparse.ArgumentParser() parser.add_argument("--rngseed", type=int, help="random seed", default=0) #parser.add_argument("--clamp", type=float, help="maximum (absolute value) gradient for clamping", default=1000000.0) #parser.add_argument("--wp", type=float, help="wall penalty (reward decrement for hitting a wall)", default=0.1) parser.add_argument("--rew", type=float, help="reward value (reward increment for taking correct action after correct stimulus)", default=1.0) parser.add_argument("--wp", type=float, help="penalty for hitting walls", default=.05) #parser.add_argument("--pen", type=float, help="penalty value (reward decrement for taking any non-rest action)", default=.2) #parser.add_argument("--exprew", type=float, help="reward value (reward increment for hitting reward location)", default=.0) parser.add_argument("--bent", type=float, help="coefficient for the entropy reward (really Simpson index concentration measure)", default=0.03) #parser.add_argument("--probarev", type=float, help="probability of reversal (random change) in desired stimulus-response, per time step", default=0.0) parser.add_argument("--blossv", type=float, help="coefficient for value prediction loss", default=.1) #parser.add_argument("--lsize", type=int, help="size of the labyrinth; must be odd", default=7) #parser.add_argument("--rp", type=int, help="whether the reward should be on the periphery", default=0) #parser.add_argument("--squash", type=int, help="squash reward through signed sqrt (1 or 0)", default=0) #parser.add_argument("--nbarms", type=int, help="number of arms", default=2) #parser.add_argument("--nbseq", type=int, help="number of sequences between reinitializations of hidden/Hebbian state and position", default=3) #parser.add_argument("--activ", help="activ function ('tanh' or 'selu')", default='tanh') #parser.add_argument("--algo", help="meta-learning algorithm (A3C or REI)", default='A3C') parser.add_argument("--rule", help="learning rule ('hebb' or 'oja')", default='hebb') parser.add_argument("--type", help="network type ('lstm' or 'rnn' or 'plastic')", default='modul') parser.add_argument("--msize", type=int, help="size of the maze; must be odd", default=9) parser.add_argument("--da", help="transformation function of DA signal (tanh or sig or lin)", default='tanh') parser.add_argument("--gr", type=float, help="gammaR: discounting factor for rewards", default=.9) parser.add_argument("--lr", type=float, help="learning rate (Adam optimizer)", default=1e-4) parser.add_argument("--fm", type=int, help="if using neuromodulation, do we modulate the whole network (1) or just half (0) ?", default=1) #parser.add_argument("--nu", type=float, help="REINFORCE baseline time constant", default=.1) #parser.add_argument("--samestep", type=int, help="compare stimulus and response in the same step (1) or from successive steps (0) ?", default=0) #parser.add_argument("--nbin", type=int, help="number of possible inputs stimulis", default=4) #parser.add_argument("--modhalf", type=int, help="which half of the recurrent netowkr receives modulation (1 or 2)", default=1) #parser.add_argument("--nbac", type=int, help="number of possible non-rest actions", default=4) parser.add_argument("--rsp", type=int, help="does the agent start each episode from random position (1) or center (0) ?", default=1) parser.add_argument("--addpw", type=int, help="are plastic weights purely additive (1) or forgetting (0) ?", default=1) #parser.add_argument("--clp", type=int, help="inputs clamped (1), fully clamped (2) or through linear layer (0) ?", default=0) #parser.add_argument("--md", type=int, help="maximum delay for reward reception", default=0) parser.add_argument("--eplen", type=int, help="length of episodes", default=100) #parser.add_argument("--exptime", type=int, help="exploration (no reward) time (must be < eplen)", default=0) parser.add_argument("--hs", type=int, help="size of the recurrent (hidden) layer", default=100) parser.add_argument("--bs", type=int, help="batch size", default=1) parser.add_argument("--l2", type=float, help="coefficient of L2 norm (weight decay)", default=3e-6) #parser.add_argument("--steplr", type=int, help="duration of each step in the learning rate annealing schedule", default=100000000) #parser.add_argument("--gamma", type=float, help="learning rate annealing factor", default=0.3) parser.add_argument("--nbiter", type=int, help="number of learning cycles", default=1000000) parser.add_argument("--save_every", type=int, help="number of cycles between successive save points", default=1000) parser.add_argument("--pe", type=int, help="number of cycles between successive printing of information", default=100) #parser.add_argument("--", type=int, help="", default=1e-4) args = parser.parse_args(); argvars = vars(args); argdict = { k : argvars[k] for k in argvars if argvars[k] != None } #train() train(argdict) ================================================ FILE: maze/testnobatch.py ================================================ import argparse import pdb import torch import torch.nn as nn from torch.autograd import Variable import numpy as np from numpy import random import torch.nn.functional as F from torch import optim from torch.optim import lr_scheduler import random import sys import pickle import time import os import platform #import makemaze import numpy as np #import matplotlib.pyplot as plt import glob np.set_printoptions(precision=4) NBDA = 1 np.set_printoptions(precision=4) ADDINPUT = 4 # 1 inputs for the previous reward, 1 inputs for numstep, 1 unused, 1 "Bias" inputs NBACTIONS = 4 # U, D, L, R RFSIZE = 3 # Receptive Field TOTALNBINPUTS = RFSIZE * RFSIZE + ADDINPUT + NBACTIONS ##ttype = torch.FloatTensor; #ttype = torch.cuda.FloatTensor; ##ttype = torch.FloatTensor; #ttype = torch.cuda.FloatTensor; class Network(nn.Module): def __init__(self, params): super(Network, self).__init__() self.rule = params['rule'] self.type = params['type'] self.softmax= torch.nn.functional.softmax #if params['activ'] == 'tanh': self.activ = F.tanh #elif params['activ'] == 'selu': # self.activ = F.selu #else: # raise ValueError('Must choose an activ function') if params['type'] == 'lstm': self.lstm = torch.nn.LSTM(TOTALNBINPUTS, params['hs']).cuda() elif params['type'] == 'rnn': self.i2h = torch.nn.Linear(TOTALNBINPUTS, params['hs']).cuda() self.w = torch.nn.Parameter((.01 * torch.rand(params['hs'], params['hs'])).cuda(), requires_grad=True) #self.inputnegmask = Variable(torch.ones(1, params['hs']), requires_grad=False).cuda() #self.inputnegmask[0, :TOTALNBINPUTS] = 0 # no modulation for 2nd half elif params['type'] == 'modplast' or params['type'] == 'modplast2': self.i2h = torch.nn.Linear(TOTALNBINPUTS, params['hs']).cuda() self.w = torch.nn.Parameter((.01 * torch.rand(params['hs'], params['hs'])).cuda(), requires_grad=True) self.alpha = torch.nn.Parameter((.01 * torch.rand(params['hs'], params['hs'])).cuda(), requires_grad=True) self.eta = torch.nn.Parameter((.01 * torch.ones(1)).cuda(), requires_grad=True) # Everyone has the same eta #self.inputnegmask = Variable(torch.ones(1, params['hs']), requires_grad=False).cuda() #self.inputnegmask[0, :TOTALNBINPUTS] = 0 # no modulation for 2nd half self.h2DA = torch.nn.Linear(params['hs'], NBDA).cuda() self.DAoutV = Variable(torch.ones(1, params['hs']), requires_grad=False).cuda() elif params['type'] == 'plastic' : self.i2h = torch.nn.Linear(TOTALNBINPUTS, params['hs']).cuda() self.w = torch.nn.Parameter((.01 * torch.rand(params['hs'], params['hs'])).cuda(), requires_grad=True) self.alpha = torch.nn.Parameter((.01 * torch.rand(params['hs'], params['hs'])).cuda(), requires_grad=True) self.eta = torch.nn.Parameter((.01 * torch.ones(1)).cuda(), requires_grad=True) # Everyone has the same eta #self.inputnegmask = Variable(torch.ones(1, params['hs']), requires_grad=False).cuda() #self.inputnegmask[0, :TOTALNBINPUTS] = 0 # no modulation for 2nd half elif params['type'] == 'modul' or params['type'] == 'modul2': self.i2h = torch.nn.Linear(TOTALNBINPUTS, params['hs']).cuda() self.w = torch.nn.Parameter((.01 * torch.rand(params['hs'], params['hs'])).cuda(), requires_grad=True) self.alpha = torch.nn.Parameter((.01 * torch.rand(params['hs'], params['hs'])).cuda(), requires_grad=True) # Note that initial eta is higher (faster) thanbefore self.eta = torch.nn.Parameter((.1 * torch.ones(1)).cuda(), requires_grad=True) # Everyone has the same eta self.etaet = torch.nn.Parameter((.1 * torch.ones(1)).cuda(), requires_grad=True) # Everyone has the same etapw self.etapw = torch.nn.Parameter((.1 * torch.ones(1)).cuda(), requires_grad=True) # Everyone has the same etapw self.h2DA = torch.nn.Linear(params['hs'], NBDA).cuda() # The daweights vectors are weight vectors from the DA output neurons to the network hidden (recurrent) neurons #self.daweights0 = Variable(torch.ones(1, params['hs']), requires_grad=False).cuda() #self.daweights0[0, (params['hs'] // 2):] = 0 # no modulation for 2nd half #self.inputnegmask = Variable(torch.ones(1, params['hs']), requires_grad=False).cuda() #self.inputnegmask[0, :TOTALNBINPUTS] = 0 # no modulation for 2nd half #else: # raise ValueError("Must specify which half of the network receives modulation") self.daweights1 = Variable(torch.ones(1, params['hs']), requires_grad=False).cuda() self.daweights1[0, :(params['hs'] // 4)] = 0 self.daweights1[0, -(params['hs'] // 4):] = 0 elif params['type'] == 'lstmplastic': self.h2f = torch.nn.Linear(params['hs'], params['hs']).cuda() self.h2i = torch.nn.Linear(params['hs'], params['hs']).cuda() self.h2opt = torch.nn.Linear(params['hs'], params['hs']).cuda() # Plasticity in the recurrent connections, h to c: #self.h2c = torch.nn.Linear(params['hs'], params['hs']).cuda() self.w = torch.nn.Parameter((.01 * torch.rand(params['hs'], params['hs'])).cuda(), requires_grad=True) self.alpha = torch.nn.Parameter((.01 * torch.rand(params['hs'], params['hs'])).cuda(), requires_grad=True) self.eta = torch.nn.Parameter((.01 * torch.ones(1)).cuda(), requires_grad=True) # Everyone has the same eta self.x2f = torch.nn.Linear(TOTALNBINPUTS, params['hs']).cuda() self.x2opt = torch.nn.Linear(TOTALNBINPUTS, params['hs']).cuda() self.x2i = torch.nn.Linear(TOTALNBINPUTS, params['hs']).cuda() self.x2c = torch.nn.Linear(TOTALNBINPUTS, params['hs']).cuda() elif params['type'] == 'lstmmanual': self.h2f = torch.nn.Linear(params['hs'], params['hs']).cuda() self.h2i = torch.nn.Linear(params['hs'], params['hs']).cuda() self.h2opt = torch.nn.Linear(params['hs'], params['hs']).cuda() self.h2c = torch.nn.Linear(params['hs'], params['hs']).cuda() self.x2f = torch.nn.Linear(TOTALNBINPUTS, params['hs']).cuda() self.x2opt = torch.nn.Linear(TOTALNBINPUTS, params['hs']).cuda() self.x2i = torch.nn.Linear(TOTALNBINPUTS, params['hs']).cuda() self.x2c = torch.nn.Linear(TOTALNBINPUTS, params['hs']).cuda() ##fgt = F.sigmoid(self.x2f(inputs) + self.h2f(hidden[0])) ##ipt = F.sigmoid(self.x2i(inputs) + self.h2i(hidden[0])) ##opt = F.sigmoid(self.x2o(inputs) + self.h2o(hidden[0])) ##cell = torch.mul(fgt, hidden[1]) + torch.mul(ipt, F.tanh(self.x2c(inputs) + self.h2c(hidden[0]))) ##h = torch.mul(opt, cell) ##hidden = (h, cell) else: raise ValueError("Which network type?") self.h2o = torch.nn.Linear(params['hs'], NBACTIONS).cuda() self.h2v = torch.nn.Linear(params['hs'], 1).cuda() self.params = params # Notice that the vectors are row vectors, and the matrices are transposed wrt the usual order, following apparent pytorch conventions # Each *column* of w targets a single output neuron def forward(self, inputs, hidden, hebb, et, pw): if self.type == 'lstm': hactiv, hidden = self.lstm(inputs.view(1, 1, -1), hidden) # hactiv is just the h. hidden is the h and the cell state, in a tuple hactiv = hactiv[0] activout = self.softmax(self.h2o(hactiv)) valueout = self.h2v(hactiv) #pdb.set_trace() #hactiv = hactiv.view(1, -1) # Apparently this was causing memory leaks?..... # Draft for a "manual" lstm: elif self.type== 'lstmmanual': # hidden[0] is the previous h state. hidden[1] is the previous c state fgt = F.sigmoid(self.x2f(inputs) + self.h2f(hidden[0])) ipt = F.sigmoid(self.x2i(inputs) + self.h2i(hidden[0])) opt = F.sigmoid(self.x2opt(inputs) + self.h2opt(hidden[0])) cell = torch.mul(fgt, hidden[1]) + torch.mul(ipt, F.tanh(self.x2c(inputs) + self.h2c(hidden[0]))) hactiv = torch.mul(opt, F.tanh(cell)) #pdb.set_trace() hidden = (hactiv, cell) activout = self.softmax(self.h2o(hactiv)) valueout = self.h2v(hactiv) if np.isnan(np.sum(hactiv.data.cpu().numpy())) or np.isnan(np.sum(hidden[1].data.cpu().numpy())) : raise ValueError("Nan detected !") elif self.type== 'lstmplastic': fgt = F.sigmoid(self.x2f(inputs) + self.h2f(hidden[0])) ipt = F.sigmoid(self.x2i(inputs) + self.h2i(hidden[0])) opt = F.sigmoid(self.x2opt(inputs) + self.h2opt(hidden[0])) #cell = torch.mul(fgt, hidden[1]) + torch.mul(ipt, F.tanh(self.x2c(inputs) + self.h2c(hidden[0]))) #Need to think what the inputs and outputs should be for the #plasticity. It might be worth introducing an additional stage #consisting of whatever is multiplied by ift and then added to the #cell state, rather than the full cell state.... But we can #experiment both! #cell = torch.mul(fgt, hidden[1]) + torch.mul(ipt, F.tanh(self.x2c(inputs) + hidden[0].mm(self.w + torch.mul(self.alpha, hebb)))) # self.h2c(hidden[0]))) inputstocell = F.tanh(self.x2c(inputs) + hidden[0].mm(self.w + torch.mul(self.alpha, hebb))) cell = torch.mul(fgt, hidden[1]) + torch.mul(ipt, inputstocell) # self.h2c(hidden[0]))) if self.rule == 'hebb': raise ValueError("Not yet implemented!") #hebb = (1 - self.eta) * hebb + self.eta * torch.bmm(hidden[0].unsqueeze(2), cell.unsqueeze(1))[0] hebb = (1 - self.eta) * hebb + self.eta * torch.bmm(hidden[0].unsqueeze(2), inputstocell.unsqueeze(1))[0] elif self.rule == 'oja': raise ValueError("Not yet implemented!") # NOTE: NOT SURE ABOUT THE OJA VERSION !! hebb = hebb + self.eta * torch.mul((hidden[0][0].unsqueeze(1) - torch.mul(hebb , inputstocell[0].unsqueeze(0))) , inputstocell[0].unsqueeze(0)) # Oja's rule. Remember that yin, yout are row vectors (dim (1,N)). Also, broadcasting! #hebb = hebb + self.eta * torch.mul((hidden[0].unsqueeze(1) - torch.mul(hebb , hactiv[0].unsqueeze(0))) , hactiv[0].unsqueeze(0)) # Oja's rule. Remember that yin, yout are row vectors (dim (1,N)). Also, broadcasting! hactiv = torch.mul(opt, F.tanh(cell)) #pdb.set_trace() hidden = (hactiv, cell) if np.isnan(np.sum(hactiv.data.cpu().numpy())) or np.isnan(np.sum(hidden[1].data.cpu().numpy())) : raise ValueError("Nan detected !") activout = self.softmax(self.h2o(hactiv)) valueout = self.h2v(hactiv) elif self.type == 'rnn': if self.params['clp'] == 0: hactiv = self.activ(self.i2h(inputs) + hidden.mm(self.w)) elif self.params['clp'] == 1: hactiv = self.activ(inputs + hidden.mm(self.w)) #elif self.params['clp'] == 2: # hidden = self.inputnegmask.mul(hidden) + inputs # hactiv = self.activ(hidden.mm(self.w)) # hactiv = self.inputnegmask.mul(hactiv) + inputs hidden = hactiv #activout = self.softmax(self.h2o(hactiv)) activout = self.h2o(hactiv) # Linear! valueout = self.h2v(hactiv) #valueout = 0 elif self.type == 'plastic_prev': # The columns of w and pw are the inputs weights to a single neuron if self.params['clp'] == 1: hactiv = self.activ(inputs + hidden.mm(self.w + torch.mul(self.alpha, hebb))) elif self.params['clp'] == 0: # No clamping, input layer hactiv = self.activ(self.i2h(inputs) + hidden.mm(self.w + torch.mul(self.alpha, hebb))) activout = self.h2o(hactiv) # Pure linear, raw scores - will be softmaxed later valueout = self.h2v(hactiv) if self.rule == 'hebb': hebb = (1 - self.eta) * hebb + self.eta * torch.bmm(hidden.unsqueeze(2), hactiv.unsqueeze(1))[0] elif self.rule == 'oja': hebb = hebb + self.eta * torch.mul((hidden[0].unsqueeze(1) - torch.mul(hebb , hactiv[0].unsqueeze(0))) , hactiv[0].unsqueeze(0)) # Oja's rule. Remember that yin, yout are row vectors (dim (1,N)). Also, broadcasting! else: raise ValueError("Must specify learning rule ('hebb' or 'oja')") hidden = hactiv elif self.type == 'plastic': # The columns of w and pw are the inputs weights to a single neuron (?) if self.params['clp'] == 1: hactiv = self.activ(inputs + hidden.mm(self.w + torch.mul(self.alpha, hebb))) elif self.params['clp'] == 0: # No clamping, input layer hactiv = self.activ(self.i2h(inputs) + hidden.mm(self.w + torch.mul(self.alpha, hebb))) activout = self.h2o(hactiv) # Pure linear, raw scores - will be softmaxed later valueout = self.h2v(hactiv) if self.rule == 'hebb': deltahebb = torch.bmm(hidden.unsqueeze(2), hactiv.unsqueeze(1))[0] elif self.rule == 'oja': deltahebb = torch.mul((hidden[0].unsqueeze(1) - torch.mul(hebb , hactiv[0].unsqueeze(0))) , hactiv[0].unsqueeze(0)) if self.params['addpw'] == 3: # Note that there is no decay, even in the Hebb-rule case : additive only! # Hard clamp hebb = torch.clamp( hebb + self.eta * deltahebb, min=-1.0, max=1.0) elif self.params['addpw'] == 2: # Note that there is no decay, even in the Hebb-rule case : additive only! # Soft clamp hebb = torch.clamp( hebb + torch.clamp(self.eta * deltahebb, min=0.0) * (1 - hebb) + torch.clamp(self.eta * deltahebb, max=0.0) * (hebb + 1) , min=-1.0, max=1.0) elif self.params['addpw'] == 1: # Purely additive, tends to make the meta-learning diverge. No decay/clamp. hebb = hebb + self.eta * deltahebb elif self.params['addpw'] == 0: # We do it the normal way. Note that here, Hebb-rule is decaying. # There is probably a way to make it more efficient. # Note 2: For Oja's rule, there is no difference between addpw 0 and addpw1 if self.rule == 'hebb': hebb = (1 - self.eta) * hebb + self.eta * deltahebb elif self.rule == 'oja': hebb = hebb + self.eta * deltahebb hidden = hactiv elif self.type == 'modplast2': #Here we compute the same deltahebb for the whole network, and use #the same addpw for the whole network too. #Only difference between #modulated and non-modulated halves is whether eta is the network's #(learned) eta parameter or the neuromodulator output DAout # The columns of w and pw are the inputs weights to a single neuron if self.params['clp'] == 1: hactiv = self.activ(inputs + hidden.mm(self.w + torch.mul(self.alpha, hebb))) else: # No clamping, input layer hactiv = self.activ(self.i2h(inputs) + hidden.mm(self.w + torch.mul(self.alpha, hebb))) activout = self.h2o(hactiv) # Pure linear, raw scores - will be softmaxed later valueout = self.h2v(hactiv) # Now computing the Hebbian updates... if self.params['da'] == 'tanh': DAout = F.tanh(self.h2DA(hactiv)) elif self.params['da'] == 'sig': DAout = F.sigmoid(self.h2DA(hactiv)) elif self.params['da'] == 'lin': DAout = self.h2DA(hactiv) else: raise ValueError("Which transformation for DAout ?") if self.rule == 'hebb': deltahebb = torch.bmm(hidden.unsqueeze(2), hactiv.unsqueeze(1))[0] elif self.rule == 'oja': deltahebb = torch.mul((hidden[0].unsqueeze(1) - torch.mul(hebb , hactiv[0].unsqueeze(0))) , hactiv[0].unsqueeze(0)) if self.params['addpw'] == 3: # Hard clamp, purely additive # Note that we do the same for Hebb and Oja's rule hebb1 = torch.clamp(hebb + DAout[0,0] * deltahebb, min=-1.0, max=1.0) hebb2 = torch.clamp(hebb + self.eta * deltahebb, min=-1.0, max=1.0) elif self.params['addpw'] == 2: # Note that there is no decay, even in the Hebb-rule case : additive only! hebb1 = torch.clamp( hebb + torch.clamp(DAout[0,0] * deltahebb, min=0.0) * (1 - hebb) + torch.clamp(DAout[0,0] * deltahebb, max=0.0) * (hebb + 1) , min=-1.0, max=1.0) hebb2 = torch.clamp( hebb + torch.clamp(self.eta * deltahebb, min=0.0) * (1 - hebb) + torch.clamp(self.eta * deltahebb, max=0.0) * (hebb + 1) , min=-1.0, max=1.0) elif self.params['addpw'] == 1: # Purely additive. This will almost certainly diverge, don't use it! hebb1 = hebb + DAout[0,0] * deltahebb hebb2 = hebb + self.eta * deltahebb elif self.params['addpw'] == 0: # We do it the normal way. Note that here, Hebb-rule is decaying. # There is probably a way to make it more efficient # Note: This can go awry if DAout can go negative or outside [0,1]! # Note 2: For Oja's rule, there is no difference between addpw 0 and addpw1 if self.rule == 'hebb': hebb1 = (1 - DAout[0,0]) * hebb + DAout[0,0] * deltahebb hebb2 = (1 - self.eta) * hebb + DAout[0,0] * deltahebb elif self.rule == 'oja': hebb1= hebb + DAout[0,0] * deltahebb hebb2= hebb + self.eta * deltahebb else: raise ValueError("Which additive form for plastic weights?") if self.params['fm'] == 1: hebb = hebb1 elif self.params['fm'] == 0: hebb = torch.cat( (hebb1[:, :self.params['hs']//2], hebb2[:, self.params['hs']//2:]), dim=1) else: raise ValueError("Must select whether fully modulated or not") hidden = hactiv elif self.type == 'modplast': # The actual network update should be the same as for "plastic". Only the Hebbian updates should be different # The columns of w and pw are the inputs weights to a single neuron if self.params['clp'] == 1: hactiv = self.activ(inputs + hidden.mm(self.w + torch.mul(self.alpha, hebb))) else: # No clamping, input layer hactiv = self.activ(self.i2h(inputs) + hidden.mm(self.w + torch.mul(self.alpha, hebb))) activout = self.h2o(hactiv) # Pure linear, raw scores - will be softmaxed later valueout = self.h2v(hactiv) # Now computing the Hebbian updates... if self.params['da'] == 'tanh': DAout = F.tanh(self.h2DA(hactiv)) elif self.params['da'] == 'sig': DAout = F.sigmoid(self.h2DA(hactiv)) elif self.params['da'] == 'lin': DAout = self.h2DA(hactiv) else: raise ValueError("Which transformation for DAout ?") if self.rule == 'hebb': deltahebb = torch.bmm(hidden.unsqueeze(2), hactiv.unsqueeze(1))[0] elif self.rule == 'oja': deltahebb = torch.mul((hidden[0].unsqueeze(1) - torch.mul(hebb , hactiv[0].unsqueeze(0))) , hactiv[0].unsqueeze(0)) if self.params['addpw'] == 3: # Hard clamp, purely additive # Note that we do the same for Hebb and Oja's rule hebb1 = torch.clamp(hebb + DAout[0,0] * deltahebb, min=-1.0, max=1.0) elif self.params['addpw'] == 2: # Note that there is no decay, even in the Hebb-rule case : additive only! hebb1 = torch.clamp( hebb + torch.clamp(DAout[0,0] * deltahebb, min=0.0) * (1 - hebb) + torch.clamp(DAout[0,0] * deltahebb, max=0.0) * (hebb + 1) , min=-1.0, max=1.0) elif self.params['addpw'] == 1: # Purely additive, tends to make the meta-learning diverge # Note that we do the same for Hebb and Oja's rule hebb1 = hebb + DAout[0,0] * deltahebb elif self.params['addpw'] == 0: # We do it the normal way. Note that here, Hebb-rule is decaying. # There is probably a way to make it more efficient by grouping it with the computation of the other (non-modulated) half. # NOTE: This can go awry if DAout can go negative! # Note 2: For Oja's rule, there is no difference between addpw 0 and addpw1 if self.rule == 'hebb': hebb1 = (1 - DAout[0,0]) * hebb + DAout[0,0] * deltahebb elif self.rule == 'oja': hebb1= hebb + DAout[0,0] * deltahebb else: raise ValueError("Which additive form for plastic weights?") # The non-neuromodulated half of the network just does standard plasticity, using learned self.eta. if self.rule == 'hebb': hebb2 = (1 - self.eta) * hebb + self.eta * deltahebb elif self.rule == 'oja': hebb2 = hebb + self.eta * deltahebb else: raise ValueError("Must specify learning rule ('hebb' or 'oja')") if self.params['fm'] == 1: hebb = hebb1 elif self.params['fm'] == 0: hebb = torch.cat( (hebb1[:, :self.params['hs']//2], hebb2[:, self.params['hs']//2:]), dim=1) else: raise ValueError("Must select whether fully modulated or not") hidden = hactiv elif self.type == 'modul': # One half of the network receives neuromodulation. The other just # does plain Hebbian plasticity; note that the eta's for the # Hebbian trace and the eligibility trace are different # We need to select the order of operations; network update, hebb update, neuromodulated incorporation into stable plastic weights # One possibility (for now go with this one): # - computing all outputs from current inputs, including DA # - incorporating neuromodulated Hebb/eligibility trace into plastic weights # - computing updated hebb # Another possibility: # - computing all outputs from current inputs, including DA # - computing updated Hebb # - incorporating this modified Hebb into plastic weights through neuromodulation # The columns of w and pw are the inputs weights to a single neuron if self.params['clp'] == 0: hactiv = self.activ(self.i2h(inputs) + hidden.mm(self.w + torch.mul(self.alpha, pw))) elif self.params['clp'] == 1: hactiv = self.activ(inputs + hidden.mm(self.w + torch.mul(self.alpha, pw))) #else: activout = self.h2o(hactiv) # Pure linear, raw scores - will be softmaxed later valueout = self.h2v(hactiv) #valueout = 0 if self.params['da'] == 'tanh': DAout = F.tanh(self.h2DA(hactiv)) elif self.params['da'] == 'sig': DAout = F.sigmoid(self.h2DA(hactiv)) elif self.params['da'] == 'lin': DAout = self.h2DA(hactiv) else: raise ValueError("Which transformation for DAout ?") if self.params['addpw'] == 3: # Hard clamp deltapw = DAout[0,0] * et pw1 = torch.clamp(pw + deltapw, min=-1.0, max=1.0) #if self.params['addpw'] == 3: # # Constrained AND cubed: This makes the soft bounds "softer", so the values can come closer to -1 and 1. # # Absolutely no difference in performance from addpw=2 ! # deltapw = DAout[0,0] * et # pw1 = torch.clamp( pw + torch.clamp(deltapw, min=0.0) * (1 - pw ** 3) + torch.clamp(deltapw, max=0.0) * (pw ** 3 + 1) , min=-1.0, max=1.0) # #if np.random.rand() < .05: # # pdb.set_trace() elif self.params['addpw'] == 2: deltapw = DAout[0,0] * et # This constrains the pw to stay within [-1, 1] (we could do that by putting a tanh on top of it, but we want pw itself to remain within that range to avoid large gradients) # The outer clamp is there for safety. In theory the expression within that clamp is "softly" constrained to stay within [-1, 1], but finite-size effects might throw it off. # Note that cubing pw in the boundary terms below would make the bounds "softer" and allow a wider range, but in practice it makes no difference in performance. pw1 = torch.clamp( pw + torch.clamp(deltapw, min=0.0) * (1 - pw) + torch.clamp(deltapw, max=0.0) * (pw + 1) , min=-.99999, max=.99999) #if np.random.rand() < .05: # pdb.set_trace() if self.params['addpw'] == 1: # Purely additive, tends to make the meta-learning diverge deltapw = DAout[0,0] * et pw1 = pw + deltapw elif self.params['addpw'] == 0: # Problem: this makes the plastic weights decaying! pw1 = pw - torch.abs(self.etapw) * pw + self.etapw * DAout[0,0] * et # Should we have a fully neuromodulated network, or only half? if self.params['fm'] == 1: pw = pw1 elif self.params['fm'] == 0: pw = torch.cat( (hebb[:, :self.params['hs']//2], pw1[:, self.params['hs'] // 2:]), dim=1) # Use output argument? else: raise ValueError("Must select whether fully modulated or not") # Note that the 'hebb' variable is only for the non-modulated part, # which is only used if params['fm'] == 0; also, hebb can be # updated Oja or decaying, but et is always decaying. if self.rule == 'hebb': deltahebb = torch.bmm(hidden.unsqueeze(2), hactiv.unsqueeze(1))[0] hebb = (1 - self.eta) * hebb + self.eta * deltahebb et = (1 - self.etaet) * et + self.etaet * deltahebb elif self.rule == 'oja': #raise ValueError("Not yet implemented!") hebb = hebb + self.eta * torch.mul((hidden[0].unsqueeze(1) - torch.mul(hebb , hactiv[0].unsqueeze(0))) , hactiv[0].unsqueeze(0)) # Oja's rule. Remember that yin, yout are row vectors (dim (1,N)). Also, broadcasting! deltahebb = torch.bmm(hidden.unsqueeze(2), hactiv.unsqueeze(1))[0] et = (1 - self.etaet) * et + self.etaet * deltahebb else: raise ValueError("Must specify learning rule ('hebb' or 'oja')") hidden = hactiv #if np.random.rand() < .05: # pdb.set_trace() elif self.type == 'modul2': # Here we try the other order: # - computing all outputs from current inputs, including DA # - computing updated Hebb # - incorporating this modified Hebb into plastic weights through neuromodulation # The columns of w and pw are the inputs weights to a single neuron if self.params['clp'] == 0: hactiv = self.activ(self.i2h(inputs) + hidden.mm(self.w + torch.mul(self.alpha, pw))) elif self.params['clp'] == 1: hactiv = self.activ(inputs + hidden.mm(self.w + torch.mul(self.alpha, pw))) #else: activout = self.h2o(hactiv) # Pure linear, raw scores - will be softmaxed later valueout = self.h2v(hactiv) #valueout = 0 if self.params['da'] == 'tanh': DAout = F.tanh(self.h2DA(hactiv)) elif self.params['da'] == 'sig': DAout = F.sigmoid(self.h2DA(hactiv)) elif self.params['da'] == 'lin': DAout = self.h2DA(hactiv) else: raise ValueError("Which transformation for DAout ?") # Updating ET before PW (note: 'hebb' variable is only for the non-modulated part of the network) if self.rule == 'hebb': deltahebb = torch.bmm(hidden.unsqueeze(2), hactiv.unsqueeze(1))[0] hebb = (1 - self.eta) * hebb + self.eta * deltahebb et = (1 - self.etaet) * et + self.etaet * deltahebb elif self.rule == 'oja': #raise ValueError("Not yet implemented!") hebb = hebb + self.eta * torch.mul((hidden[0].unsqueeze(1) - torch.mul(hebb , hactiv[0].unsqueeze(0))) , hactiv[0].unsqueeze(0)) # Oja's rule. Remember that yin, yout are row vectors (dim (1,N)). Also, broadcasting! deltahebb = torch.bmm(hidden.unsqueeze(2), hactiv.unsqueeze(1))[0] et = (1 - self.etaet) * et + self.etaet * deltahebb else: raise ValueError("Must specify learning rule ('hebb' or 'oja')") if self.params['addpw'] == 3: # Hard clamp deltapw = DAout[0,0] * et pw1 = torch.clamp(pw + deltapw, min=-1.0, max=1.0) #if self.params['addpw'] == 3: # # Constrained AND cubed: This makes the soft bounds "softer", so the values can come closer to -1 and 1. # # Absolutely no difference in performance from addpw=2 ! # deltapw = DAout[0,0] * et # pw1 = torch.clamp( pw + torch.clamp(deltapw, min=0.0) * (1 - pw ** 3) + torch.clamp(deltapw, max=0.0) * (pw ** 3 + 1) , min=-1.0, max=1.0) # #if np.random.rand() < .05: # # pdb.set_trace() elif self.params['addpw'] == 2: deltapw = DAout[0,0] * et # This constrains the pw to stay within [-1, 1] (we could do that by putting a tanh on top of it, but we want pw itself to remain within that range to avoid large gradients) # The outer clamp is there for safety. In theory the expression within that clamp is "softly" constrained to stay within [-1, 1], but finite-size effects might throw it off. # Note that cubing pw in the boundary terms below would make the bounds "softer" and allow a wider range, but in practice it makes no difference in performance. pw1 = torch.clamp( pw + torch.clamp(deltapw, min=0.0) * (1 - pw) + torch.clamp(deltapw, max=0.0) * (pw + 1) , min=-.99999, max=.99999) #if np.random.rand() < .05: # pdb.set_trace() if self.params['addpw'] == 1: # Purely additive, tends to make the meta-learning diverge deltapw = DAout[0,0] * et pw1 = pw + deltapw elif self.params['addpw'] == 0: # Problem: this makes the plastic weights decaying! pw1 = pw - torch.abs(self.etapw) * pw + self.etapw * DAout[0,0] * et # Should we have a fully neuromodulated network, or only half? if self.params['fm'] == 1: pw = pw1 elif self.params['fm'] == 0: pw = torch.cat( (hebb[:, :self.params['hs']//2], pw1[:, self.params['hs'] // 2:]), dim=1) # Use output argument? else: raise ValueError("Must select whether fully modulated or not") hidden = hactiv #if np.random.rand() < .05: # pdb.set_trace() return activout, valueout, hidden, hebb, et, pw def initialZeroHebb(self): return Variable(torch.zeros(self.params['hs'], self.params['hs']) , requires_grad=False).cuda() def initialZeroPlasticWeights(self): return Variable(torch.zeros(self.params['hs'], self.params['hs']) , requires_grad=False).cuda() def initialZeroState(self): if self.params['type'] == 'lstm': return (Variable(torch.zeros(1, 1, self.params['hs']), requires_grad=False).cuda() , Variable(torch.zeros(1, 1, self.params['hs']), requires_grad=False ).cuda() ) elif self.params['type'] == 'lstmmanual' or self.params['type'] == 'lstmplastic': return (Variable(torch.zeros(1, self.params['hs']), requires_grad=False).cuda() , Variable(torch.zeros(1, self.params['hs']), requires_grad=False ).cuda() ) elif self.params['type'] == 'rnn' or self.params['type'] == 'plastic' or self.params['type'] == 'modul' or self.params['type'] == 'modul2' or self.params['type'] == 'modplast' or self.params['type'] == 'modplast2': return Variable(torch.zeros(1, self.params['hs']), requires_grad=False ).cuda() def train(paramdict): #params = dict(click.get_current_context().params) #TOTALNBINPUTS = RFSIZE * RFSIZE + ADDINPUT + NBNONRESTACTIONS print("Starting training...") params = {} #params.update(defaultParams) params.update(paramdict) print("Passed params: ", params) print(platform.uname()) #params['nbsteps'] = params['nbshots'] * ((params['prestime'] + params['interpresdelay']) * params['nbclasses']) + params['prestimetest'] # Total number of steps per episode suffix = "maz_"+"".join([str(x)+"_" if pair[0] is not 'nbsteps' and pair[0] is not 'rngseed' and pair[0] is not 'save_every' and pair[0] is not 'test_every' and pair[0] is not 'print_every' else '' for pair in sorted(zip(params.keys(), params.values()), key=lambda x:x[0] ) for x in pair])[:-1] + "_rngseed_" + str(params['rngseed']) # Turning the parameters into a nice suffix for filenames # Initialize random seeds (first two redundant?) print("Setting random seeds") np.random.seed(params['rngseed']); random.seed(params['rngseed']); torch.manual_seed(params['rngseed']) #print(click.get_current_context().params) print("Initializing network") net = Network(params) print ("Shape of all optimized parameters:", [x.size() for x in net.parameters()]) allsizes = [torch.numel(x.data.cpu()) for x in net.parameters()] print ("Size (numel) of all optimized elements:", allsizes) print ("Total size (numel) of all optimized elements:", sum(allsizes)) #total_loss = 0.0 print("Initializing optimizer") optimizer = torch.optim.Adam(net.parameters(), lr=1.0*params['lr'], eps=1e-4, weight_decay=params['l2']) #optimizer = torch.optim.SGD(net.parameters(), lr=1.0*params['lr']) #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, gamma=params['gamma'], step_size=params['steplr']) #LABSIZE = params['lsize'] #lab = np.ones((LABSIZE, LABSIZE)) #CTR = LABSIZE // 2 # Simple cross maze #lab[CTR, 1:LABSIZE-1] = 0 #lab[1:LABSIZE-1, CTR] = 0 # Double-T maze #lab[CTR, 1:LABSIZE-1] = 0 #lab[1:LABSIZE-1, 1] = 0 #lab[1:LABSIZE-1, LABSIZE - 2] = 0 # Grid maze #lab[1:LABSIZE-1, 1:LABSIZE-1].fill(0) #for row in range(1, LABSIZE - 1): # for col in range(1, LABSIZE - 1): # if row % 2 == 0 and col % 2 == 0: # lab[row, col] = 1 #lab[CTR,CTR] = 0 # Not strictly necessary, but perhaps helps loclization by introducing a detectable irregularity in the center LABSIZE = params['msize'] lab = np.ones((LABSIZE, LABSIZE)) CTR = LABSIZE // 2 # Simple cross maze #lab[CTR, 1:LABSIZE-1] = 0 #lab[1:LABSIZE-1, CTR] = 0 # Double-T maze #lab[CTR, 1:LABSIZE-1] = 0 #lab[1:LABSIZE-1, 1] = 0 #lab[1:LABSIZE-1, LABSIZE - 2] = 0 # Grid maze lab[1:LABSIZE-1, 1:LABSIZE-1].fill(0) for row in range(1, LABSIZE - 1): for col in range(1, LABSIZE - 1): if row % 2 == 0 and col % 2 == 0: lab[row, col] = 1 # Not strictly necessary, but cleaner since we start the agent at the # center for each episode; may help loclization in some maze sizes # (including 13 and 9, but not 11) by introducing a detectable irregularity # in the center: lab[CTR,CTR] = 0 all_losses = [] all_losses_objective = [] all_total_rewards = [] all_losses_v = [] lossbetweensaves = 0 nowtime = time.time() meanrewards = np.zeros((LABSIZE, LABSIZE)) meanrewardstmp = np.zeros((LABSIZE, LABSIZE, params['eplen'])) pos = 0 hidden = net.initialZeroState() hebb = net.initialZeroHebb() pw = net.initialZeroPlasticWeights() #celoss = torch.nn.CrossEntropyLoss() # For supervised learning - not used here print("Starting episodes!") for numiter in range(params['nbiter']): PRINTTRACE = 0 #if (numiter+1) % (1 + params['print_every']) == 0: if (numiter+1) % (params['print_every']) == 0: PRINTTRACE = 1 #lab = makemaze.genmaze(size=LABSIZE, nblines=4) #count = np.zeros((LABSIZE, LABSIZE)) # Select the reward location for this episode - not on a wall! # And not on the center either! (though not sure how useful that restriction is...) rposr = 0; rposc = 0 while lab[rposr, rposc] == 1 or (rposr == CTR and rposc == CTR): rposr = np.random.randint(1, LABSIZE - 1) rposc = np.random.randint(1, LABSIZE - 1) # We always start the episode from the center (when hitting reward, we may teleport either to center or to a random location depending on params['rsp']) posc = CTR posr = CTR optimizer.zero_grad() loss = 0 lossv = 0 hidden = net.initialZeroState() hebb = net.initialZeroHebb() et = net.initialZeroHebb() # Eligibility Trace is identical to Hebbian Trace in shape pw = net.initialZeroPlasticWeights() numactionchosen = 0 reward = 0.0 rewards = [] vs = [] logprobs = [] sumreward = 0.0 dist = 0 rewarddelay = -1 rewardpercep = 0 #reloctime = np.random.randint(params['eplen'] // 4, (3 * params['eplen']) // 4) #print("EPISODE ", numiter) for numstep in range(params['eplen']): ## We randomly relocate the reward halfway through #if numstep == reloctime: # rposr = 0; rposc = 0 # while lab[rposr, rposc] == 1 or (rposr == CTR and rposc == CTR): # rposr = np.random.randint(1, LABSIZE - 1) # rposc = np.random.randint(1, LABSIZE - 1) if params['clp'] == 0: inputs = np.zeros((1, TOTALNBINPUTS), dtype='float32') else: inputs = np.zeros((1, params['hs']), dtype='float32') labg = lab.copy() #labg[rposr, rposc] = -1 # The agent can see the reward if it falls within its RF inputs[0, 0:RFSIZE * RFSIZE] = labg[posr - RFSIZE//2:posr + RFSIZE//2 +1, posc - RFSIZE //2:posc + RFSIZE//2 +1].flatten() * 1.0 # Previous chosen action inputs[0, RFSIZE * RFSIZE +1] = 1.0 # Bias neuron inputs[0, RFSIZE * RFSIZE +2] = numstep / params['eplen'] #inputs[0, RFSIZE * RFSIZE +3] = 1.0 * reward # Reward from previous time step inputs[0, RFSIZE * RFSIZE +3] = 1.0 * rewardpercep inputs[0, RFSIZE * RFSIZE + ADDINPUT + numactionchosen] = 1 #inputs = 100.0 * inputs # input boosting : Very bad with clamp=0 inputsC = torch.from_numpy(inputs).cuda() # Might be better: #if rposr == posr and rposc = posc: # inputs[0][-4] = 100.0 #else: # inputs[0][-4] = 0 # Running the network ## Running the network y, v, hidden, hebb, et, pw = net(Variable(inputsC, requires_grad=False), hidden, hebb, et, pw) # y should output raw scores, not probas # For now: #numactionchosen = np.argmax(y.data[0]) # But wait, this is bad, because the network needs to see the # reward signal to guide its own (within-episode) learning... and # argmax might not provide enough exploration for this! #ee = np.exp(y.data[0].cpu().numpy()) #numactionchosen = np.random.choice(NBNONRESTACTIONS, p = ee / (1e-10 + np.sum(ee))) y = F.softmax(y, dim=1) # Must convert y to probas to use this ! distrib = torch.distributions.Categorical(y) actionchosen = distrib.sample() # sample() returns a Pytorch tensor of size 1; this is needed for the backprop below numactionchosen = actionchosen.data[0] # Turn to scalar #if numiter == 103 and numstep == 98: # pdb.set_trace() tgtposc = posc tgtposr = posr if numactionchosen == 0: # Up tgtposr -= 1 elif numactionchosen == 1: # Down tgtposr += 1 elif numactionchosen == 2: # Left tgtposc -= 1 elif numactionchosen == 3: # Right tgtposc += 1 else: raise ValueError("Wrong Action") reward = 0.0 if lab[tgtposr][tgtposc] == 1: # Hit wall! reward = -params['wp'] else: dist += 1 posc = tgtposc posr = tgtposr # Did we hit the reward location ? if rposr == posr and rposc == posc: reward += params['rew'] if params['rsp'] == 1: posr = np.random.randint(1, LABSIZE - 1) posc = np.random.randint(1, LABSIZE - 1) while lab[posr, posc] == 1 or (rposr == posr and rposc == posc): posr = np.random.randint(1, LABSIZE - 1) posc = np.random.randint(1, LABSIZE - 1) else: posr = CTR posc = CTR rewardpercep = reward # This is with reward delay. Not necessarily buggy, but it causes some divergences w/ batch due to the reward counter for not detecting rewards # if rewarddelay < 0: # Make sure that the reward delay counter is not active. NOTE: this can cause weirdnesses if e.g. re-teleporting multiple times on the reward location....? # # If we already have hit the reward location, but haven't # # perceived it / been transported yet, we don't care if we # # do it again before the perception (and transportation) # # has occurred # reward += params['rew'] # That is the reward that meta-learning cares about - not the one perceived by the agent, which is delayed # rewarddelay = 1 + np.random.randint(1 + params['md']) #rewardpercep = 0 #if rewarddelay > -1: # rewarddelay -= 1 #if rewarddelay == 0: # # Now we can perceive the reward (and teleport)! # # NOTE: in this implementation, the agent only perceives the positive # # rewards - not the 'pain' of hitting the walls. That's OK (not # # something you *need* to learn within-life, outer loop can # # learn it)! # rewardpercep = params['rew'] # if params['rsp'] == 1: # posr = np.random.randint(1, LABSIZE - 1) # posc = np.random.randint(1, LABSIZE - 1) # while lab[posr, posc] == 1 or (rposr == posr and rposc == posc): # posr = np.random.randint(1, LABSIZE - 1) # posc = np.random.randint(1, LABSIZE - 1) # else: # posr = CTR # posc = CTR ## Explortion reward (actually a penalty on the normalized visit count of the new location) #count[posr, posc] += 1 #reward -= (count[posr, posc] / np.sum(count)) * params['exprew'] if PRINTTRACE: #print("Step ", numstep, "- GI: ", goodinputs, ", GA: ", goodaction, " Inputs: ", inputsN, " - Outputs: ", y.data.cpu().numpy(), " - action chosen: ", numactionchosen, # " - inputsthisstep:", inputsthisstep, " - mean abs pw: ", np.mean(np.abs(pw.data.cpu().numpy())), " -Rew: ", reward) print("Step ", numstep, " Inputs: ", inputs[0,:TOTALNBINPUTS], " - Outputs: ", y.data.cpu().numpy(), " - action chosen: ", numactionchosen, " - mean abs pw: ", np.mean(np.abs(pw.data.cpu().numpy())), " -Reward (this step): ", reward) rewards.append(reward) vs.append(v) sumreward += reward logprobs.append(distrib.log_prob(actionchosen)) #if params['algo'] == 'A3C': loss += params['bent'] * y.pow(2).sum() # We want to penalize concentration, i.e. encourage diversity; our version of PyTorch does not have an entropy() function for Distribution, so we use this instead. ##if PRINTTRACE: ## print("Probabilities:", y.data.cpu().numpy(), "Picked action:", numactionchosen, ", got reward", reward) # Episode is done, now let's do the actual computations gammaR = params['gr'] if True: #params['algo'] == 'A3C': R = 0 for numstepb in reversed(range(params['eplen'])) : #BATCHSIZE = 1 #R = gammaR * R + rewards[numstepb] #ctrR = R - vs[numstepb][0] #lossv += ctrR.pow(2).sum() / BATCHSIZE #loss -= (logprobs[numstepb] * ctrR.detach()).sum() / BATCHSIZE # Need to check if detach() is OK R = gammaR * R + rewards[numstepb] lossv += (vs[numstepb][0] - R).pow(2) loss -= logprobs[numstepb] * (R - vs[numstepb].data[0][0]) # Not sure if the "data" is needed... put it b/c of worry about weird gradient flows loss += params['blossv'] * lossv #elif params['algo'] == 'REI': # R = sumreward # baseline = meanrewards[rposr, rposc] # for numstepb in reversed(range(params['eplen'])) : # loss -= logprobs[numstepb] * (R - baseline) #elif params['algo'] == 'REINOB': # R = sumreward # for numstepb in reversed(range(params['eplen'])) : # loss -= logprobs[numstepb] * R #elif params['algo'] == 'REITMP': # R = 0 # for numstepb in reversed(range(params['eplen'])) : # R = gammaR * R + rewards[numstepb] # loss -= logprobs[numstepb] * R #elif params['algo'] == 'REITMPB': # R = 0 # for numstepb in reversed(range(params['eplen'])) : # R = gammaR * R + rewards[numstepb] # loss -= logprobs[numstepb] * (R - meanrewardstmp[rposr, rposc, numstepb]) #else: # raise ValueError("Which algo?") #meanrewards[rposr, rposc] = (1.0 - params['nu']) * meanrewards[rposr, rposc] + params['nu'] * sumreward #R = 0 #for numstepb in reversed(range(params['eplen'])) : # R = gammaR * R + rewards[numstepb] # meanrewardstmp[rposr, rposc, numstepb] = (1.0 - params['nu']) * meanrewardstmp[rposr, rposc, numstepb] + params['nu'] * R loss /= params['eplen'] if PRINTTRACE: if True: #params['algo'] == 'A3C': print("lossv: ", lossv.data.cpu().numpy()[0]) print ("Total reward for this episode:", sumreward, "Dist:", dist) #if params['squash'] == 1: # if sumreward < 0: # sumreward = -np.sqrt(-sumreward) # else: # sumreward = np.sqrt(sumreward) #elif params['squash'] == 0: # pass #else: # raise ValueError("Incorrect value for squash parameter") #loss *= sumreward if numiter == 5212 : pdb.set_trace() #for p in net.parameters(): # p.grad.data.clamp_(-params['clp'], params['clp']) if numiter > 100: # Burn-in period for meanrewards loss.backward() optimizer.step() #torch.cuda.empty_cache() #print(sumreward) lossnum = loss.data[0] lossbetweensaves += lossnum all_losses_objective.append(lossnum) all_total_rewards.append(sumreward) #all_losses_v.append(lossv.data[0]) #total_loss += lossnum if (numiter+1) % params['print_every'] == 0: np.savetxt('a2.txt', all_losses_objective) print(numiter, "====") print("Mean loss: ", lossbetweensaves / params['print_every']) lossbetweensaves = 0 print("Mean reward: ", np.sum(all_total_rewards[-params['print_every']:])/ params['print_every']) previoustime = nowtime nowtime = time.time() print("Time spent on last", params['print_every'], "iters: ", nowtime - previoustime) if params['type'] == 'plastic' or params['type'] == 'lstmplastic': print("ETA: ", net.eta.data.cpu().numpy(), "alpha[0,1]: ", net.alpha.data.cpu().numpy()[0,1], "w[0,1]: ", net.w.data.cpu().numpy()[0,1] ) elif params['type'] == 'modul': print("ETA: ", net.eta.data.cpu().numpy(), " etaet: ", net.etaet.data.cpu().numpy(), " mean-abs pw: ", np.mean(np.abs(pw.data.cpu().numpy()))) elif params['type'] == 'rnn': print("w[0,1]: ", net.w.data.cpu().numpy()[0,1] ) if (numiter+1) % params['save_every'] == 0: print("Saving files...") # lossbetweensaves /= params['save_every'] # print("Average loss over the last", params['save_every'], "episodes:", lossbetweensaves) # print("Alternative computation (should be equal):", np.mean(all_losses_objective[-params['save_every']:])) losslast100 = np.mean(all_losses_objective[-100:]) print("Average loss over the last 100 episodes:", losslast100) # # Instability detection; necessary for SELUs, which seem to be divergence-prone # # Note that if we are unlucky enough to have diverged within the last 100 timesteps, this may not save us. # if losslast100 > 2 * lossbetweensavesprev: # print("We have diverged ! Restoring last savepoint!") # net.load_state_dict(torch.load('./torchmodel_'+suffix + '.txt')) # else: print("Saving local files...") #with open('params_'+suffix+'.dat', 'wb') as fo: # #pickle.dump(net.w.data.cpu().numpy(), fo) # #pickle.dump(net.alpha.data.cpu().numpy(), fo) # #pickle.dump(net.eta.data.cpu().numpy(), fo) # #pickle.dump(all_losses, fo) # pickle.dump(params, fo) #with open('loss_'+suffix+'.txt', 'w') as thefile: # for item in all_losses_objective: # thefile.write("%s\n" % item) #with open('lossv_'+suffix+'.txt', 'w') as thefile: # for item in all_losses_v: # thefile.write("%s\n" % item) with open('loss_'+suffix+'.txt', 'w') as thefile: for item in all_total_rewards[::10]: thefile.write("%s\n" % item) torch.save(net.state_dict(), 'torchmodel_'+suffix+'.dat') with open('params_'+suffix+'.dat', 'wb') as fo: pickle.dump(params, fo) if os.path.isdir('/mnt/share/tmiconi'): print("Transferring to NFS storage...") for fn in ['params_'+suffix+'.dat', 'loss_'+suffix+'.txt', 'torchmodel_'+suffix+'.dat']: result = os.system( 'cp {} {}'.format(fn, '/mnt/share/tmiconi/modulmaze/'+fn)) print("Done!") # lossbetweensavesprev = lossbetweensaves # lossbetweensaves = 0 # sys.stdout.flush() # sys.stderr.flush() if __name__ == "__main__": #defaultParams = { # 'type' : 'lstm', # 'seqlen' : 200, # 'hs': 500, # 'activ': 'tanh', # 'steplr': 10e9, # By default, no change in the learning rate # 'gamma': .5, # The annealing factor of learning rate decay for Adam # 'imagesize': 31, # 'nbiter': 30000, # 'lr': 1e-4, # 'test_every': 10, # 'save_every': 3000, # 'rngseed':0 #} parser = argparse.ArgumentParser() parser.add_argument("--rngseed", type=int, help="random seed", default=0) #parser.add_argument("--clamp", type=float, help="maximum (absolute value) gradient for clamping", default=1000000.0) #parser.add_argument("--wp", type=float, help="wall penalty (reward decrement for hitting a wall)", default=0.1) parser.add_argument("--rew", type=float, help="reward value (reward increment for taking correct action after correct stimulus)", default=1.0) parser.add_argument("--wp", type=float, help="penalty for hitting walls", default=.05) #parser.add_argument("--pen", type=float, help="penalty value (reward decrement for taking any non-rest action)", default=.2) #parser.add_argument("--exprew", type=float, help="reward value (reward increment for hitting reward location)", default=.0) parser.add_argument("--bent", type=float, help="coefficient for the entropy reward (really Simpson index concentration measure)", default=0.03) #parser.add_argument("--probarev", type=float, help="probability of reversal (random change) in desired stimulus-response, per time step", default=0.0) parser.add_argument("--blossv", type=float, help="coefficient for value prediction loss", default=.1) #parser.add_argument("--lsize", type=int, help="size of the labyrinth; must be odd", default=7) #parser.add_argument("--randstart", type=int, help="when hitting reward, should we teleport to random location (1) or center (0)?", default=0) #parser.add_argument("--rp", type=int, help="whether the reward should be on the periphery", default=0) #parser.add_argument("--squash", type=int, help="squash reward through signed sqrt (1 or 0)", default=0) #parser.add_argument("--nbarms", type=int, help="number of arms", default=2) #parser.add_argument("--nbseq", type=int, help="number of sequences between reinitializations of hidden/Hebbian state and position", default=3) #parser.add_argument("--activ", help="activ function ('tanh' or 'selu')", default='tanh') #parser.add_argument("--algo", help="meta-learning algorithm (A3C or REI)", default='A3C') parser.add_argument("--rule", help="learning rule ('hebb' or 'oja')", default='hebb') parser.add_argument("--type", help="network type ('lstm' or 'rnn' or 'plastic')", default='modul') parser.add_argument("--msize", type=int, help="size of the maze; must be odd", default=9) parser.add_argument("--da", help="transformation function of DA signal (tanh or sig or lin)", default='tanh') parser.add_argument("--gr", type=float, help="gammaR: discounting factor for rewards", default=.9) parser.add_argument("--lr", type=float, help="learning rate (Adam optimizer)", default=1e-4) parser.add_argument("--fm", type=int, help="if using neuromodulation, do we modulate the whole network (1) or just half (0) ?", default=1) #parser.add_argument("--nu", type=float, help="REINFORCE baseline time constant", default=.1) #parser.add_argument("--samestep", type=int, help="compare stimulus and response in the same step (1) or from successive steps (0) ?", default=0) #parser.add_argument("--nbin", type=int, help="number of possible inputs stimulis", default=4) #parser.add_argument("--modhalf", type=int, help="which half of the recurrent netowkr receives modulation (1 or 2)", default=1) #parser.add_argument("--nbac", type=int, help="number of possible non-rest actions", default=4) parser.add_argument("--rsp", type=int, help="does the agent start each episode from random position (1) or center (0) ?", default=1) parser.add_argument("--addpw", type=int, help="are plastic weights purely additive (1) or forgetting (0) ?", default=1) parser.add_argument("--clp", type=int, help="inputs clamped (1), fully clamped (2) or through linear layer (0) ?", default=0) parser.add_argument("--md", type=int, help="maximum delay for reward reception", default=0) parser.add_argument("--eplen", type=int, help="length of episodes", default=100) #parser.add_argument("--exptime", type=int, help="exploration (no reward) time (must be < eplen)", default=0) parser.add_argument("--hs", type=int, help="size of the recurrent (hidden) layer", default=100) parser.add_argument("--l2", type=float, help="coefficient of L2 norm (weight decay)", default=3e-6) #parser.add_argument("--steplr", type=int, help="duration of each step in the learning rate annealing schedule", default=100000000) #parser.add_argument("--gamma", type=float, help="learning rate annealing factor", default=0.3) parser.add_argument("--nbiter", type=int, help="number of learning cycles", default=1000000) parser.add_argument("--save_every", type=int, help="number of cycles between successive save points", default=1000) parser.add_argument("--print_every", type=int, help="number of cycles between successive printing of information", default=100) #parser.add_argument("--", type=int, help="", default=1e-4) args = parser.parse_args(); argvars = vars(args); argdict = { k : argvars[k] for k in argvars if argvars[k] != None } #train() train(argdict) ================================================ FILE: omniglot/.ipynb_checkpoints/Omniglot Data Loading-checkpoint.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Loading Omniglot data...\n", "['/home/tmiconi/exp/omniglot-master/python/images_background/Futurama', '/home/tmiconi/exp/omniglot-master/python/images_background/Japanese_(katakana)', '/home/tmiconi/exp/omniglot-master/python/images_background/Cyrillic', '/home/tmiconi/exp/omniglot-master/python/images_background/Tagalog']\n", "['/home/tmiconi/exp/omniglot-master/python/images_evaluation/Ge_ez', '/home/tmiconi/exp/omniglot-master/python/images_evaluation/Tibetan', '/home/tmiconi/exp/omniglot-master/python/images_evaluation/Tengwar', '/home/tmiconi/exp/omniglot-master/python/images_evaluation/Atemayar_Qelisayer']\n", "1623\n", "(105, 105)\n", "Data loaded!\n" ] } ], "source": [ "# Loading Omniglot data\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import glob\n", "\n", "print(\"Loading Omniglot data...\")\n", "imagedata = []\n", "imagefilenames=[]\n", "for basedir in ('/home/tmiconi/exp/omniglot-master/python/images_background/', \n", " '/home/tmiconi/exp/omniglot-master/python/images_evaluation/'):\n", " alphabetdirs = glob.glob(basedir+'*')\n", " print(alphabetdirs[:4])\n", " for alphabetdir in alphabetdirs:\n", " chardirs = glob.glob(alphabetdir+\"/*\")\n", " for chardir in chardirs:\n", " chardata = []\n", " charfiles = glob.glob(chardir+'/*')\n", " for fn in charfiles:\n", " filedata = plt.imread(fn)\n", " chardata.append(filedata)\n", " imagedata.append(chardata)\n", " imagefilenames.append(fn)\n", "# imagedata is now a list of lists of numpy arrays \n", "# imagedata[CharactertNumber][FileNumber] -> numpy(105,105)\n", "print(len(imagedata))\n", "print(imagedata[1][2].shape)\n", "print(\"Data loaded!\")\n", "\n" ] }, { "cell_type": "code", "execution_count": 101, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.5/dist-packages/skimage/transform/_warps.py:84: UserWarning: The default mode, 'constant', will be changed to 'reflect' in skimage 0.15.\n", " warn(\"The default mode, 'constant', will be changed to 'reflect' in \"\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Displayed.\n" ] }, { "data": { "text/plain": [ "dtype('float64')" ] }, "execution_count": 101, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAADFtJREFUeJzt3W2sHOV5h/Hr7gmkEskHMK7rOG5M\nI4pEkHCqU7c0qEqVFxyKavIFxaoit6VxlAaptKkURFUVVVVFI5I0H1qkk2JhKkqoRBBWZZVQqyqK\nGgEH5GAMTaDEEXb8hp02phJvJ3c/nHF0Aufsrndnd9bc109a7ew8szu3x/57Xp6dfSIzkVTPz3Rd\ngKRuGH6pKMMvFWX4paIMv1SU4ZeKMvxSUYZfKsrwS0W9bZIru/CCmdyw/pxJrlIq5cALr/HiyYUY\nZNmRwh8Rm4GvADPAP2Tmrb2W37D+HB59cP0oq5TUw6arXhh42aEP+yNiBvg74GPApcDWiLh02M+T\nNFmjnPNvAp7LzOcz81Xga8CWdsqSNG6jhH8dsPQY42Az76dExPaImI+I+eMnFkZYnaQ2jf1qf2bO\nZeZsZs6uXjUz7tVJGtAo4T8ELL169+5mnqSzwCjhfwy4OCIuiohzgU8Au9opS9K4Dd3Vl5mvR8QN\nwIMsdvXtyMz9rVUmaaxG6ufPzN3A7pZqkTRBfr1XKsrwS0UZfqkowy8VZfilogy/VJThl4oy/FJR\nhl8qyvBLRRl+qSjDLxVl+KWiDL9UlOGXijL8UlGGXyrK8EtFGX6pKMMvFWX4paIMv1SU4ZeKMvxS\nUYZfKsrwS0UZfqkowy8VZfilokYapTciDgCngAXg9cycbaMoSeM3Uvgbv5mZL7bwOZImyMN+qahR\nw5/ANyLi8YjY3kZBkiZj1MP+KzPzUET8HPBQRPxXZj68dIHmP4XtAL+wro2zDEltGGnPn5mHmudj\nwP3ApmWWmcvM2cycXb1qZpTVSWrR0OGPiPMi4p2np4GPAk+1VZik8RrlOHwNcH9EnP6cf8rMf22l\nKkljN3T4M/N54PIWa5E0QXb1SUUZfqkowy8VZfilogy/VJThl4qaqu/bXvWujV2XIA3kwR/s7bqE\nkbnnl4oy/FJRhl8qyvBLRRl+qSjDLxVl+KWipqqf/8iNv97Zun/+b/+zZ3uXtVW17s79PdsX/ud/\ne7Yf+eNx/p3Zzy/pLGX4paIMv1SU4ZeKMvxSUYZfKsrwS0VFZk5sZbOX/2w++uD6ia3vTPT7LYFp\nvn/78i/84Ypt/b6/0OWfa9y/3zDNf2fjsumqF5j/9ssxyLLu+aWiDL9UlOGXijL8UlGGXyrK8EtF\nGX6pqL7380fEDuAa4FhmXtbMuwC4F9gAHACuy8wfjq9M9dKvL79Lo/Tl7zr0WM/23173K0N/tgbb\n898JbH7DvJuAPZl5MbCneS3pLNI3/Jn5MHDyDbO3ADub6Z3AtS3XJWnMhj3nX5OZh5vpI8CaluqR\nNCEjX/DLxZsDVrxBICK2R8R8RMwfP7Ew6uoktWTY8B+NiLUAzfOxlRbMzLnMnM3M2dWrZoZcnaS2\nDRv+XcC2Znob8EA75UialL7hj4h7gG8Bl0TEwYi4HrgV+EhEPAt8uHkt6SzSt58/M7eu0PShlmsp\na+8rr/Rs//xFv9qz/Qd/uvLv07/rtvF+B2CUfvx+99tf/eHf6fMJ3x163fIbflJZhl8qyvBLRRl+\nqSjDLxVl+KWipmqI7requ0+t6tl+1yW9f878xB9c0bN935/8/YptV93Wuyvuon/5VM/2X9re+7ba\nfkb5+eyFp3t35b18zaY+n1Dvp7vPhHt+qSjDLxVl+KWiDL9UlOGXijL8UlGGXyrKfv4WjDrU9Cu/\n1fsnqOf/8vaRPr+Xfv34n3n2uZ7t1573UpvlnJEXf+//Olv3W4F7fqkowy8VZfilogy/VJThl4oy\n/FJRhl8qyn7+AW1+T697x1/t+d6/+d4jPds3vn167zvvsh9f4+WeXyrK8EtFGX6pKMMvFWX4paIM\nv1SU4ZeK6tvPHxE7gGuAY5l5WTPvFuBTwPFmsZszc/e4ipwG+drKffn9f5v+7e0WcwYO3ve+nu37\nr7h7QpVo2gyy578T2LzM/C9n5sbm8ZYOvvRW1Df8mfkwcHICtUiaoFHO+W+IiCcjYkdEnN9aRZIm\nYtjw3w68F9gIHAa+uNKCEbE9IuYjYv74iYUhVyepbUOFPzOPZuZCZv4Y+Cqw4l0vmTmXmbOZObt6\n1cywdUpq2VDhj4i1S15+HHiqnXIkTcogXX33AB8ELoyIg8BfAB+MiI1AAgeAT4+xRklj0Df8mbl1\nmdl3jKGWqfbX33u0R+u5E6vjTNmPr5X4DT+pKMMvFWX4paIMv1SU4ZeKMvxSUf50d6P/bbnT251X\nld2Yo3HPLxVl+KWiDL9UlOGXijL8UlGGXyrK8EtF2c+vs9Yld3ymZ/uGP//Wim0z77uk53t3P3Tv\nUDWdTdzzS0UZfqkowy8VZfilogy/VJThl4oy/FJR9vPrrNWrHx/g5WtWHEiK/5iba7ucs457fqko\nwy8VZfilogy/VJThl4oy/FJRhl8qqm8/f0SsB+4C1gAJzGXmVyLiAuBeYANwALguM384vlJVzYG/\nuqJn+3d+//Y+n9BvLIbaBtnzvw58LjMvBX4N+GxEXArcBOzJzIuBPc1rSWeJvuHPzMOZ+UQzfQp4\nBlgHbAF2NovtBK4dV5GS2ndG5/wRsQF4P/AIsCYzDzdNR1g8LZB0lhg4/BHxDuA+4MbM/NHStsxM\nFq8HLPe+7RExHxHzx08sjFSspPYMFP6IOIfF4N+dmV9vZh+NiLVN+1rg2HLvzcy5zJzNzNnVq2ba\nqFlSC/qGPyICuAN4JjO/tKRpF7Ctmd4GPNB+eZLGZZBbej8AfBLYFxGn+05uBm4F/jkirge+D1w3\nnhJVVf+uPI2ib/gz85tArND8oXbLkTQpfsNPKsrwS0UZfqkowy8VZfilogy/VJThl4oy/FJRhl8q\nyvBLRRl+qSjDLxVl+KWiDL9UlOGXijL8UlGGXyrK8EtFGX6pKMMvFWX4paIMv1SU4ZeKMvxSUYZf\nKsrwS0UZfqkowy8VZfilogy/VFTf8EfE+oj494h4OiL2R8QfNfNviYhDEbG3eVw9/nIlteVtAyzz\nOvC5zHwiIt4JPB4RDzVtX87M28ZXnqRx6Rv+zDwMHG6mT0XEM8C6cRcmabzO6Jw/IjYA7wceaWbd\nEBFPRsSOiDh/hfdsj4j5iJg/fmJhpGIltWfg8EfEO4D7gBsz80fA7cB7gY0sHhl8cbn3ZeZcZs5m\n5uzqVTMtlCypDQOFPyLOYTH4d2fm1wEy82hmLmTmj4GvApvGV6aktg1ytT+AO4BnMvNLS+avXbLY\nx4Gn2i9P0rgMcrX/A8AngX0RsbeZdzOwNSI2AgkcAD49lgoljcUgV/u/CcQyTbvbL0fSpPgNP6ko\nwy8VZfilogy/VJThl4oy/FJRhl8qyvBLRRl+qSjDLxVl+KWiDL9UlOGXijL8UlGRmZNbWcRx4PtL\nZl0IvDixAs7MtNY2rXWBtQ2rzdrek5mrB1lwouF/08oj5jNztrMCepjW2qa1LrC2YXVVm4f9UlGG\nXyqq6/DPdbz+Xqa1tmmtC6xtWJ3U1uk5v6TudL3nl9SRTsIfEZsj4jsR8VxE3NRFDSuJiAMRsa8Z\neXi+41p2RMSxiHhqybwLIuKhiHi2eV52mLSOapuKkZt7jCzd6babthGvJ37YHxEzwHeBjwAHgceA\nrZn59EQLWUFEHABmM7PzPuGI+A3gJeCuzLysmfcF4GRm3tr8x3l+Zn5+Smq7BXip65GbmwFl1i4d\nWRq4FvhdOtx2Peq6jg62Wxd7/k3Ac5n5fGa+CnwN2NJBHVMvMx8GTr5h9hZgZzO9k8V/PBO3Qm1T\nITMPZ+YTzfQp4PTI0p1uux51daKL8K8DXljy+iDTNeR3At+IiMcjYnvXxSxjTTNsOsARYE2XxSyj\n78jNk/SGkaWnZtsNM+J127zg92ZXZuYvAx8DPtsc3k6lXDxnm6bumoFGbp6UZUaW/okut92wI163\nrYvwHwLWL3n97mbeVMjMQ83zMeB+pm/04aOnB0ltno91XM9PTNPIzcuNLM0UbLtpGvG6i/A/Blwc\nERdFxLnAJ4BdHdTxJhFxXnMhhog4D/go0zf68C5gWzO9DXigw1p+yrSM3LzSyNJ0vO2mbsTrzJz4\nA7iaxSv+/w38WRc1rFDXLwLfbh77u64NuIfFw8DXWLw2cj2wCtgDPAv8G3DBFNX2j8A+4EkWg7a2\no9quZPGQ/klgb/O4uutt16OuTrab3/CTivKCn1SU4ZeKMvxSUYZfKsrwS0UZfqkowy8VZfilov4f\nNMLe08/YJvoAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAC7ZJREFUeJzt3V2IXPUZx/Hfr2tUiBaSNY1pjI0V\nKYhgLNO0VikWq4mhkEjBmgtJQYz4AlqkVWxp7V1qq7YXrRA1mIrVFqyYi9CYBkFEsa4SNdG2URsx\nMWZjcqHSFs369GJPZNWdM+PMOXMmeb4fWHb2nHl5GPLNvJzZ/TsiBCCfzzU9AIBmED+QFPEDSRE/\nkBTxA0kRP5AU8QNJET+QFPEDSR01yBs7YfZILFwwY5A3CaSy840P9PaBCXdz3r7it71U0m8ljUi6\nOyLWlJ1/4YIZ+vumBf3cJIASi5e80fV5e37ab3tE0u8kXSTpdEkrbZ/e6/UBGKx+XvMvlvRKRLwW\nEe9LelDS8mrGAlC3fuKfL2nqc4xdxbaPsb3a9pjtsX37J/q4OQBVqv3d/ohYGxGtiGjNGR2p++YA\ndKmf+HdLmvru3UnFNgCHgX7if0bSabZPsX20pEslbahmLAB16/lQX0QctH2tpE2aPNS3LiK2VzYZ\ngFr1dZw/IjZK2ljRLAAGiI/3AkkRP5AU8QNJET+QFPEDSRE/kBTxA0kRP5AU8QNJET+QFPEDSRE/\nkBTxA0kRP5AU8QNJET+QFPEDSRE/kBTxA0kRP5AU8QNJET+QFPEDSRE/kBTxA0kRP5AU8QNJET+Q\nFPEDSfW1Sq/tnZLelTQh6WBEtKoYCkD9+oq/8O2IeLuC6wEwQDztB5LqN/6Q9KjtZ22vrmIgAIPR\n79P+cyNit+0vSNps+x8R8fjUMxT/KayWpJPnV/EqA0AV+nrkj4jdxfdxSQ9LWjzNedZGRCsiWnNG\nR/q5OQAV6jl+2zNtH3/otKQLJW2rajAA9ernefhcSQ/bPnQ9f4yIv1YyFYDa9Rx/RLwm6cwKZwEw\nQBzqA5IifiAp4geSIn4gKeIHkiJ+ICniB5IifiAp4geSIn4gKeIHkiJ+ICniB5IifiAp/q7WEeDM\nW69uu+/E3zxZetlNb26tehwcJnjkB5IifiAp4geSIn4gKeIHkiJ+ICniB5IifiAp4geSIn4gKeIH\nkiJ+ICniB5IifiAp4geS6hi/7XW2x21vm7Jttu3NtncU32fVOyZKueQLaKObR/57JS39xLabJG2J\niNMkbSl+BnAY6Rh/RDwu6cAnNi+XtL44vV7SiornAlCzXl/zz42IPcXptyTNrWgeAAPS9xt+ERGS\not1+26ttj9ke27d/ot+bA1CRXuPfa3ueJBXfx9udMSLWRkQrIlpzRkd6vDkAVes1/g2SVhWnV0l6\npJpxAAxKN4f6HpD0lKSv2N5l+3JJayRdYHuHpO8UPwM4jHT8u/0RsbLNrvMrngU9ev5Hv2+7b8kd\niwY4CQ4nfMIPSIr4gaSIH0iK+IGkiB9IiviBpFiiG0Pr7Oe/V7r/nf8cW7p/+9n3VznOEYdHfiAp\n4geSIn4gKeIHkiJ+ICniB5IifiApjvOjL62fXVW6f/Tup3q+7s/r1Q77yy1R+19n3vTm1h4mOrLw\nyA8kRfxAUsQPJEX8QFLEDyRF/EBSxA8kxXF+lFp6cqt0/+jB8uP4+684u+2+n/74vtLLrpj5Xun+\nTpZ8sf1x/mUXfL/0shs3/6mv2z4c8MgPJEX8QFLEDyRF/EBSxA8kRfxAUsQPJNXxOL/tdZK+K2k8\nIs4ott0i6QpJ+4qz3RwRG+saEvUpOxY+6WBf1z/2izv7ujzq080j/72Slk6z/Y6IWFR8ET5wmOkY\nf0Q8LunAAGYBMED9vOa/1vYLttfZnlXZRAAGotf475R0qqRFkvZIuq3dGW2vtj1me2zf/okebw5A\n1XqKPyL2RsRERHwo6S5Ji0vOuzYiWhHRmjM60uucACrWU/y250358WJJ26oZB8CgdHOo7wFJ50k6\nwfYuST+XdJ7tRZJC0k5JV9Y4I4AadIw/IlZOs/meGmZBj8781dVt952oJ/u67k5/377z5wTqc+Wu\n9n8rYNJ/2+7J8Pv6nfAJPyAp4geSIn4gKeIHkiJ+ICniB5LiT3cfAU68o/fDeXUvVV12KPDUZ44t\nveyrX/tfh2tvfyhPkt764TdL9rJEN4/8QFLEDyRF/EBSxA8kRfxAUsQPJEX8QFIc5z/ClR/rlvo9\n3v3Lfz9duv/GU77edl/n4/jlrtrxSun+FTM5ll+GR34gKeIHkiJ+ICniB5IifiAp4geSIn4gKY7z\nHwHKfye/3mPdi445pnR/3X8vAL3jkR9IiviBpIgfSIr4gaSIH0iK+IGkiB9IqmP8thfYfsz2S7a3\n276u2D7b9mbbO4rvs+ofF0BVunnkPyjphog4XdI3JF1j+3RJN0naEhGnSdpS/AzgMNEx/ojYExHP\nFafflfSypPmSlktaX5xtvaQVdQ0JoHqf6TW/7YWSzpL0tKS5EbGn2PWWpLmVTgagVl3Hb/s4SQ9J\nuj4i3pm6LyJCUrS53GrbY7bH9u2f6GtYANXpKn7bMzQZ/v0R8Zdi817b84r98ySNT3fZiFgbEa2I\naM0ZHaliZgAV6Obdfku6R9LLEXH7lF0bJK0qTq+S9Ej14wGoSze/0nuOpMskvWj70O9n3ixpjaQ/\n275c0uuSLqlnRAB16Bh/RDwhyW12n1/tOAAGhU/4AUkRP5AU8QNJET+QFPEDSRE/kBTxA0kRP5AU\n8QNJET+QFPEDSRE/kBTxA0kRP5AU8QNJET+QFPEDSRE/kBTxA0kRP5AU8QNJET+QFPEDSRE/kBTx\nA0kRP5AU8QNJET+QFPEDSRE/kFTH+G0vsP2Y7Zdsb7d9XbH9Ftu7bW8tvpbVPy6AqhzVxXkOSroh\nIp6zfbykZ21vLvbdERG/rm88AHXpGH9E7JG0pzj9ru2XJc2vezAA9fpMr/ltL5R0lqSni03X2n7B\n9jrbs9pcZrXtMdtj+/ZP9DUsgOp0Hb/t4yQ9JOn6iHhH0p2STpW0SJPPDG6b7nIRsTYiWhHRmjM6\nUsHIAKrQVfy2Z2gy/Psj4i+SFBF7I2IiIj6UdJekxfWNCaBq3bzbb0n3SHo5Im6fsn3elLNdLGlb\n9eMBqEs37/afI+kySS/a3lpsu1nSStuLJIWknZKurGVCALXo5t3+JyR5ml0bqx8HwKDwCT8gKeIH\nkiJ+ICniB5IifiAp4geSIn4gKeIHkiJ+ICniB5IifiAp4geSIn4gKeIHknJEDO7G7H2SXp+y6QRJ\nbw9sgM9mWGcb1rkkZutVlbN9KSLmdHPGgcb/qRu3xyKi1dgAJYZ1tmGdS2K2XjU1G0/7gaSIH0iq\n6fjXNnz7ZYZ1tmGdS2K2XjUyW6Ov+QE0p+lHfgANaSR+20tt/9P2K7ZvamKGdmzvtP1isfLwWMOz\nrLM9bnvblG2zbW+2vaP4Pu0yaQ3NNhQrN5esLN3ofTdsK14P/Gm/7RFJ/5J0gaRdkp6RtDIiXhro\nIG3Y3impFRGNHxO2/S1J70n6Q0ScUWy7VdKBiFhT/Mc5KyJuHJLZbpH0XtMrNxcLysyburK0pBWS\nfqAG77uSuS5RA/dbE4/8iyW9EhGvRcT7kh6UtLyBOYZeRDwu6cAnNi+XtL44vV6T/3gGrs1sQyEi\n9kTEc8XpdyUdWlm60fuuZK5GNBH/fElvTPl5l4Zrye+Q9KjtZ22vbnqYacwtlk2XpLckzW1ymGl0\nXLl5kD6xsvTQ3He9rHhdNd7w+7RzI+Krki6SdE3x9HYoxeRrtmE6XNPVys2DMs3K0h9p8r7rdcXr\nqjUR/25JC6b8fFKxbShExO7i+7ikhzV8qw/vPbRIavF9vOF5PjJMKzdPt7K0huC+G6YVr5uI/xlJ\np9k+xfbRki6VtKGBOT7F9szijRjZninpQg3f6sMbJK0qTq+S9EiDs3zMsKzc3G5laTV83w3ditcR\nMfAvScs0+Y7/q5J+0sQMbeb6sqTni6/tTc8m6QFNPg38QJPvjVwuaVTSFkk7JP1N0uwhmu0+SS9K\nekGToc1raLZzNfmU/gVJW4uvZU3fdyVzNXK/8Qk/ICne8AOSIn4gKeIHkiJ+ICniB5IifiAp4geS\nIn4gqf8DKi6h9fAP6nwAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAADJpJREFUeJzt3X+sX3V9x/Hne9fiEvAPWrq2lro6\n1i1pSFq3m2ZTsrE4LaJJMVuITWa6rFmNSjaMwRE2N+L2B8Ep+sckqdJYF4eYIKORZpU1WwiBCBdC\n+SFzZaQOan/Rmgj7B6jv/XFPzZXee76X76/zbd/PR/LNPd/zOed73vnCq+d8z+ec84nMRFI9v9R1\nAZK6Yfilogy/VJThl4oy/FJRhl8qyvBLRRl+qSjDLxX1lnFu7JKlU7l2zZJxblIq5dALr/HSqdOx\nmGUHCn9EXAV8GZgCvpaZt7Qtv3bNEh7Zt2aQTUpqsWnzC4tetu/D/oiYAv4J+ACwHtgaEev7/TxJ\n4zXIb/5NwHOZ+Xxmvgp8C9gynLIkjdog4V8NzD3GeLGZ9wsiYkdEzETEzImTpwfYnKRhGvnZ/szc\nmZnTmTm9fNnUqDcnaZEGCf9hYO7Zu0ubeZLOAYOE/1FgXUS8MyIuAD4C7BlOWZJGre+uvsx8PSKu\nA/Yx29W3KzOfGVplkkZqoH7+zNwL7B1SLZLGyMt7paIMv1SU4ZeKMvxSUYZfKsrwS0UZfqkowy8V\nZfilogy/VJThl4oy/FJRhl8qaqyP7tb4/f6OHa3tv/zdR8ZUydmOfurdre0HbvjKmCqpyT2/VJTh\nl4oy/FJRhl8qyvBLRRl+qSjDLxVlP/95YMOtn1iwbeV3H2pd9+j17X3to7TytvbaNt+2caDP3/fj\nJwZa/3znnl8qyvBLRRl+qSjDLxVl+KWiDL9UlOGXihqonz8iDgEvA6eB1zNzehhFaXwOfKbDe+Y/\nM9jqm98+2HUA1Q3jIp8/yMyXhvA5ksbIw36pqEHDn8D3IuKxiGh/XpSkiTLoYf8VmXk4In4FuD8i\n/iszH5i7QPOPwg6Ad6z2VgJpUgy058/Mw83f48A9wKZ5ltmZmdOZOb182dQgm5M0RH2HPyIujIi3\nnZkG3g88PazCJI3WIMfhK4B7IuLM5/xLZv7bUKqSNHJ9hz8znwc2DLEW9amtr37zl87fvnDv1x+M\nXX1SUYZfKsrwS0UZfqkowy8VZfilorze9jzQ+uhu2h+P3aVBb8m1q28w7vmlogy/VJThl4oy/FJR\nhl8qyvBLRRl+qSj7+c8Dg9zS+8FNH2xtv++R+/qqaTEGHx7cfv5BuOeXijL8UlGGXyrK8EtFGX6p\nKMMvFWX4paLs5z/P9brnffPb29fvdc/9IPfUj3p48H/9v4sWbLvmwldGuu1zgXt+qSjDLxVl+KWi\nDL9UlOGXijL8UlGGXyqqZz9/ROwCPgQcz8zLm3lLgbuAtcAh4NrM/MnoytSofPzgc63tt6/79db2\nLp8H0Etb7df4zP9F7fm/Dlz1hnk3Avszcx2wv3kv6RzSM/yZ+QBw6g2ztwC7m+ndwDVDrkvSiPX7\nm39FZh5ppo8CK4ZUj6QxGfiEX2YmkAu1R8SOiJiJiJkTJ08PujlJQ9Jv+I9FxCqA5u/xhRbMzJ2Z\nOZ2Z08uXTfW5OUnD1m/49wDbmultwL3DKUfSuPQMf0TcCTwM/GZEvBgR24FbgPdFxEHgD5v3ks4h\nPfv5M3PrAk3vHXIt6kCv+9p79Yf3eh7AKG249ROt7St5aEyVnJu8wk8qyvBLRRl+qSjDLxVl+KWi\nDL9UlI/u1kht+PzC3XEHbhjs0d0XH3xtoPWrc88vFWX4paIMv1SU4ZeKMvxSUYZfKsrwS0XZz6+B\nHPr7321tX/vZlttqbxhs22+979HW9kP/0Fabj+52zy8VZfilogy/VJThl4oy/FJRhl8qyvBLRdnP\nr4H8cPvtre2bP7txTJWcbe3fPLxw45+Nr45J5Z5fKsrwS0UZfqkowy8VZfilogy/VJThl4rq2c8f\nEbuADwHHM/PyZt7NwJ8DJ5rFbsrMvaMqUpOry2Gy9/UcPry7awzOBYvZ838duGqe+bdl5sbmZfCl\nc0zP8GfmA8CpMdQiaYwG+c1/XUQ8GRG7IuLioVUkaSz6Df/twGXARuAI8IWFFoyIHRExExEzJ06e\n7nNzkoatr/Bn5rHMPJ2ZPwO+CmxqWXZnZk5n5vTyZVP91ilpyPoKf0SsmvP2w8DTwylH0rgspqvv\nTuBK4JKIeBH4O+DKiNgIJHAI+NgIa5Q0Aj3Dn5lb55l9xwhqkTRGXuEnFWX4paIMv1SU4ZeKMvxS\nUYZfKspHd2sgK7/Ufsvu0U+9u6W1u2Gyr77yj1rb9/7n3WOqpDvu+aWiDL9UlOGXijL8UlGGXyrK\n8EtFGX6pKPv5NVIHbvhKZ9u+438fXLBt+zvGWMiEcs8vFWX4paIMv1SU4ZeKMvxSUYZfKsrwS0XZ\nz19czyG2e9yvP8kufctFfa87/bcfb22f+dztfX/2pHDPLxVl+KWiDL9UlOGXijL8UlGGXyrK8EtF\n9eznj4g1wDeAFUACOzPzyxGxFLgLWAscAq7NzJ+MrtTz1/qH/qS1fc0fPz2yba9ksH78fT/u7tn7\no7Tsaw+3L/C58dQxSovZ878OfDoz1wO/A3wyItYDNwL7M3MdsL95L+kc0TP8mXkkMx9vpl8GngVW\nA1uA3c1iu4FrRlWkpOF7U7/5I2It8C7g+8CKzDzSNB1l9meBpHPEosMfERcBdwPXZ+ZP57ZlZjJ7\nPmC+9XZExExEzJw4eXqgYiUNz6LCHxFLmA3+NzPzO83sYxGxqmlfBRyfb93M3JmZ05k5vXzZ1DBq\nljQEPcMfEQHcATybmV+c07QH2NZMbwPuHX55kkZlMbf0vgf4KPBURJzp17kJuAX4dkRsB34EXDua\nEiffhs/3uC32tvbutDW0d+VN/cZl7QUsWfg/497772pft6jztYvyzegZ/sx8EIgFmt873HIkjYtX\n+ElFGX6pKMMvFWX4paIMv1SU4ZeK8tHdQ9BrGOrH/uLV1vbffusFPbZgn7SGzz2/VJThl4oy/FJR\nhl8qyvBLRRl+qSjDLxVlP/8Y9O7Hl8bPPb9UlOGXijL8UlGGXyrK8EtFGX6pKMMvFWX4paIMv1SU\n4ZeKMvxSUYZfKsrwS0UZfqkowy8V1TP8EbEmIv4jIn4QEc9ExF8282+OiMMR8UTzunr05UoalsU8\nzON14NOZ+XhEvA14LCLub9puy8x/HF15kkalZ/gz8whwpJl+OSKeBVaPujBJo/WmfvNHxFrgXcD3\nm1nXRcSTEbErIi5eYJ0dETETETMnTp4eqFhJw7Po8EfERcDdwPWZ+VPgduAyYCOzRwZfmG+9zNyZ\nmdOZOb182dQQSpY0DIsKf0QsYTb438zM7wBk5rHMPJ2ZPwO+CmwaXZmShm0xZ/sDuAN4NjO/OGf+\nqjmLfRh4evjlSRqVxZztfw/wUeCpiDgzVvRNwNaI2AgkcAj42EgqlDQSiznb/yAQ8zTtHX45ksbF\nK/ykogy/VJThl4oy/FJRhl8qyvBLRRl+qSjDLxVl+KWiDL9UlOGXijL8UlGGXyrK8EtFRWaOb2MR\nJ4AfzZl1CfDS2Ap4cya1tkmtC6ytX8Os7Vczc/liFhxr+M/aeMRMZk53VkCLSa1tUusCa+tXV7V5\n2C8VZfiloroO/86Ot99mUmub1LrA2vrVSW2d/uaX1J2u9/ySOtJJ+CPiqoj4YUQ8FxE3dlHDQiLi\nUEQ81Yw8PNNxLbsi4nhEPD1n3tKIuD8iDjZ/5x0mraPaJmLk5paRpTv97iZtxOuxH/ZHxBTw38D7\ngBeBR4GtmfmDsRaygIg4BExnZud9whHxe8ArwDcy8/Jm3q3Aqcy8pfmH8+LM/KsJqe1m4JWuR25u\nBpRZNXdkaeAa4E/p8LtrqetaOvjeutjzbwKey8znM/NV4FvAlg7qmHiZ+QBw6g2ztwC7m+ndzP7P\nM3YL1DYRMvNIZj7eTL8MnBlZutPvrqWuTnQR/tXAC3Pev8hkDfmdwPci4rGI2NF1MfNY0QybDnAU\nWNFlMfPoOXLzOL1hZOmJ+e76GfF62Dzhd7YrMvO3gA8An2wObydSzv5mm6TumkWN3Dwu84ws/XNd\nfnf9jng9bF2E/zCwZs77S5t5EyEzDzd/jwP3MHmjDx87M0hq8/d4x/X83CSN3DzfyNJMwHc3SSNe\ndxH+R4F1EfHOiLgA+Aiwp4M6zhIRFzYnYoiIC4H3M3mjD+8BtjXT24B7O6zlF0zKyM0LjSxNx9/d\nxI14nZljfwFXM3vG/3+Av+6ihgXq+jXgQPN6puvagDuZPQx8jdlzI9uBZcB+4CDw78DSCartn4Gn\ngCeZDdqqjmq7gtlD+ieBJ5rX1V1/dy11dfK9eYWfVJQn/KSiDL9UlOGXijL8UlGGXyrK8EtFGX6p\nKMMvFfX/gxbi67QgFM4AAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAC8ZJREFUeJzt3V2MXHd5gPHn7RK4CAjZjnGNcXFA\nKZKVqqZa3KqJKhBfIYrkcJNiJOpKUY2ASKRCgihckKsqqiBpLwrSurEwFQ1FIlF8YRGCRZtSUJpN\nZJwvSEJqFDuO7diRSIQQZHl7scdonezOjGfOzBnv+/yk1c6eM7vzZpTHZ2bO7P4jM5FUzx90PYCk\nbhi/VJTxS0UZv1SU8UtFGb9UlPFLRRm/VJTxS0W9bpI3dsnamdyy+aJJ3qRUypFnf8sLZxZikOuO\nFH9EXAX8MzAD/Gtm3trr+ls2X8T/3rt5lJuU1MP2Dz878HWHftgfETPAvwAfAbYCOyNi67A/T9Jk\njfKcfzvwdGY+k5m/Ab4F7GhnLEnjNkr8m4CljzGONtvOERG7I2I+IuZPnV4Y4eYktWnsr/Zn5lxm\nzmbm7Pp1M+O+OUkDGiX+Y8DSV+/e1myTdAEYJf4Hgcsi4tKIeD3wMWB/O2NJGrehT/Vl5isRcQNw\nL4un+vZm5mOtTSZprEY6z5+ZB4ADLc0iaYJ8e69UlPFLRRm/VJTxS0UZv1SU8UtFGb9UlPFLRRm/\nVJTxS0UZv1SU8UtFGb9UlPFLRRm/VJTxS0UZv1SU8UtFGb9UlPFLRRm/VJTxS0UZv1SU8UtFGb9U\nlPFLRRm/VJTxS0UZv1TUSKv0RsQR4CVgAXglM2fbGErS+I0Uf+N9mflCCz9H0gT5sF8qatT4E/he\nRDwUEbvbGEjSZIz6sP/KzDwWEW8B7ouIn2bm/Uuv0PyjsBvgjza18SxDUhtGOvJn5rHm80ngbmD7\nMteZy8zZzJxdv25mlJuT1KKh44+IiyPiTWcvAx8CHm1rMEnjNcrj8A3A3RFx9uf8e2Z+t5WpJI3d\n0PFn5jPAn7Y4i6QJ8lSfVJTxS0UZv1SU8UtFGb9UlPFLRfl+21XgXf/9Nyvu2/LXhyc4yXS597lD\nY/vZH37rtp77n7/xL3vu/8nnv9rmOEPxyC8VZfxSUcYvFWX8UlHGLxVl/FJRxi8V5Xn+VaDXufwL\n4XzzsPqda/+T2z694r63fvlHbY9zjj/8pz4///NjvfmBeOSXijJ+qSjjl4oyfqko45eKMn6pKOOX\nivI8/yp3IZ/HH1Wvc/m/vuY1i0ud47/m5nru7/v7/H/f+/0VML6/NTAoj/xSUcYvFWX8UlHGLxVl\n/FJRxi8VZfxSUX3P80fEXuAa4GRmXt5sWwv8B7AFOAJcl5kvjm9MVXT1B67rc40ne+7t/Xf7uz/P\n3rVBjvxfB6561babgIOZeRlwsPla0gWkb/yZeT9w5lWbdwD7msv7gGtbnkvSmA37nH9DZh5vLj8P\nbGhpHkkTMvILfpmZQK60PyJ2R8R8RMyfOr0w6s1Jasmw8Z+IiI0AzeeTK10xM+cyczYzZ9evmxny\n5iS1bdj49wO7msu7gHvaGUfSpPSNPyLuBH4MvCsijkbE9cCtwAcj4ingA83Xki4gfc/zZ+bOFXa9\nv+VZpHMc+P63e+7v9zv1oxj1Z1/x8YdbmmR8fIefVJTxS0UZv1SU8UtFGb9UlPFLRfmnu1e5cZ4O\nm3a9/ttf/u47en7v/zx3V9vjTB2P/FJRxi8VZfxSUcYvFWX8UlHGLxVl/FJRnudf5Wa2/nHP/b+6\n9M099//nnj1tjtOqfu9h8E939+aRXyrK+KWijF8qyvilooxfKsr4paKMXyrK8/yrXL8/f626PPJL\nRRm/VJTxS0UZv1SU8UtFGb9UlPFLRfU9zx8Re4FrgJOZeXmz7Rbg74BTzdVuzswD4xpytZv90qd6\n7l+358cTmkSVDHLk/zpw1TLbb8/Mbc2H4UsXmL7xZ+b9wJkJzCJpgkZ5zn9DRByOiL0Rsaa1iSRN\nxLDxfw14J7ANOA58ZaUrRsTuiJiPiPlTpxeGvDlJbRsq/sw8kZkLmfk7YA+wvcd15zJzNjNn16+b\nGXZOSS0bKv6I2Ljky48Cj7YzjqRJGeRU353Ae4FLIuIo8CXgvRGxDUjgCPDJMc4oaQz6xp+ZO5fZ\nfMcYZlm1+v19+bes+WnP/f/wfw/03L/tDW8475kk3+EnFWX8UlHGLxVl/FJRxi8VZfxSUf7p7imw\n8OKLPfd/4dI/n9AkqsQjv1SU8UtFGb9UlPFLRRm/VJTxS0UZv1SU5/k1kifn3tNz/+3vu3NCk+h8\neeSXijJ+qSjjl4oyfqko45eKMn6pKOOXivI8/wTc+9yhrkcYo9X837a6eeSXijJ+qSjjl4oyfqko\n45eKMn6pKOOXiuobf0RsjogfRMTjEfFYRHy22b42Iu6LiKeaz2vGP66ktgxy5H8F+FxmbgX+AvhM\nRGwFbgIOZuZlwMHma0kXiL7xZ+bxzHy4ufwS8ASwCdgB7Guutg+4dlxDSmrfeT3nj4gtwLuBB4AN\nmXm82fU8sKHVySSN1cDxR8Qbge8AN2bmL5fuy8wEcoXv2x0R8xExf+r0wkjDSmrPQPFHxEUshv/N\nzLyr2XwiIjY2+zcCJ5f73sycy8zZzJxdv26mjZkltWCQV/sDuAN4IjNvW7JrP7CrubwLuKf98SSN\nyyC/0nsF8AngkYg4+/ubNwO3At+OiOuBXwDXjWdESePQN/7M/CEQK+x+f7vjSJoU3+EnFWX8UlHG\nLxVl/FJRxi8VZfxSUcYvFWX8UlHGLxVl/FJRxi8VZfxSUcYvFWX8UlHGLxVl/FJRxi8VZfxSUcYv\nFWX8UlHGLxVl/FJRxi8VZfxSUcYvFWX8UlHGLxVl/FJRxi8VZfxSUX3jj4jNEfGDiHg8Ih6LiM82\n22+JiGMRcaj5uHr840pqy+sGuM4rwOcy8+GIeBPwUETc1+y7PTO/PL7xJI1L3/gz8zhwvLn8UkQ8\nAWwa92CSxuu8nvNHxBbg3cADzaYbIuJwROyNiDUrfM/uiJiPiPlTpxdGGlZSewaOPyLeCHwHuDEz\nfwl8DXgnsI3FRwZfWe77MnMuM2czc3b9upkWRpbUhoHij4iLWAz/m5l5F0BmnsjMhcz8HbAH2D6+\nMSW1bZBX+wO4A3giM29bsn3jkqt9FHi0/fEkjcsgr/ZfAXwCeCQiDjXbbgZ2RsQ2IIEjwCfHMqGk\nsRjk1f4fArHMrgPtjyNpUnyHn1SU8UtFGb9UlPFLRRm/VJTxS0UZv1SU8UtFGb9UlPFLRRm/VJTx\nS0UZv1SU8UtFRWZO7sYiTgG/WLLpEuCFiQ1wfqZ1tmmdC5xtWG3O9vbMXD/IFSca/2tuPGI+M2c7\nG6CHaZ1tWucCZxtWV7P5sF8qyvilorqOf67j2+9lWmeb1rnA2YbVyWydPueX1J2uj/ySOtJJ/BFx\nVUT8LCKejoibuphhJRFxJCIeaVYenu94lr0RcTIiHl2ybW1E3BcRTzWfl10mraPZpmLl5h4rS3d6\n303bitcTf9gfETPAk8AHgaPAg8DOzHx8ooOsICKOALOZ2fk54Yj4K+Bl4BuZeXmz7R+BM5l5a/MP\n55rM/MKUzHYL8HLXKzc3C8psXLqyNHAt8Ld0eN/1mOs6OrjfujjybweezsxnMvM3wLeAHR3MMfUy\n837gzKs27wD2NZf3sfg/z8StMNtUyMzjmflwc/kl4OzK0p3edz3m6kQX8W8Cnl3y9VGma8nvBL4X\nEQ9FxO6uh1nGhmbZdIDngQ1dDrOMvis3T9KrVpaemvtumBWv2+YLfq91ZWb+GfAR4DPNw9uplIvP\n2abpdM1AKzdPyjIrS/9el/fdsCtet62L+I8Bm5d8/bZm21TIzGPN55PA3Uzf6sMnzi6S2nw+2fE8\nvzdNKzcvt7I0U3DfTdOK113E/yBwWURcGhGvBz4G7O9gjteIiIubF2KIiIuBDzF9qw/vB3Y1l3cB\n93Q4yzmmZeXmlVaWpuP7bupWvM7MiX8AV7P4iv/PgS92McMKc70D+Enz8VjXswF3svgw8LcsvjZy\nPbAOOAg8BXwfWDtFs/0b8AhwmMXQNnY025UsPqQ/DBxqPq7u+r7rMVcn95vv8JOK8gU/qSjjl4oy\nfqko45eKMn6pKOOXijJ+qSjjl4r6f2vcpxkDpnnOAAAAAElFTkSuQmCC\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAC8BJREFUeJzt3V2oHPUdxvHn6WmkEL1IYhpijMba\nIMSWxnIMtoZisWpMC4kUxFAkhdBIVagitGIvKvQmtFXxogaSGozFt4KKoaSNaWgJUlGPEvNiahMl\n1sS8mRRMLqzm+OvFmcgxOfvi7uzOnPy+H1h2dv6zOz+G85x5+c/u3xEhAPl8oeoCAFSD8ANJEX4g\nKcIPJEX4gaQIP5AU4QeSIvxAUoQfSOqL/VzZuZMHYtbMCf1cJZDKnnc/1vtHh93Osl2F3/YCSQ9K\nGpD0h4hY0Wz5WTMn6OUNM7tZJYAm5l33btvLdnzYb3tA0u8lXS9pjqQltud0+nkA+qubc/55knZH\nxNsR8ZGkJyUtKqcsAL3WTfhnSBp9jLG3mPcZtpfbHrI9dPjIcBerA1Cmnl/tj4hVETEYEYNTpwz0\nenUA2tRN+PdJGn317vxiHoBxoJvwvyJptu2LbJ8l6SZJ68opC0CvddzVFxEnbN8uaYNGuvrWRMSO\n0ioD0FNd9fNHxHpJ60uqBUAfcXsvkBThB5Ii/EBShB9IivADSRF+ICnCDyRF+IGkCD+QFOEHkiL8\nQFKEH0iK8ANJEX4gKcIPJEX4gaQIP5AU4QeSIvxAUoQfSIrwA0n1dYjuM9Wt+65o2v7W5R929fkb\n3tvS1fuBsbDnB5Ii/EBShB9IivADSRF+ICnCDyRF+IGkuurnt71H0jFJw5JORMRgGUWNN6368ff8\n+ltN299ctrLMcoC2lHGTz3cj4v0SPgdAH3HYDyTVbfhD0vO2X7W9vIyCAPRHt4f98yNin+0vS9po\n+18RsXn0AsU/heWSdMEMvkoA1EVXe/6I2Fc8H5L0rKR5YyyzKiIGI2Jw6pSBblYHoEQdh9/2RNvn\nnJyWdK2k7WUVBqC3ujkOnybpWdsnP+fxiPhrKVUB6LmOwx8Rb0v6Rom11NqCC087oxnlo6bvpR8f\ndURXH5AU4QeSIvxAUoQfSIrwA0kRfiAp7rdtU3zcuDuPn9bGeMSeH0iK8ANJEX4gKcIPJEX4gaQI\nP5AU4QeSop8flbnuvLlN23+6a3fT9sUTj5dZTjrs+YGkCD+QFOEHkiL8QFKEH0iK8ANJEX4gKcIP\nJEX4gaQIP5AU4QeSIvxAUoQfSIrwA0kRfiCpluG3vcb2IdvbR82bbHuj7V3F86TelgmgbO3s+R+R\ntOCUeXdL2hQRsyVtKl4DGEdahj8iNks6esrsRZLWFtNrJS0uuS4APdbpOf+0iNhfTB+QNK2kegD0\nSdcX/CIiJEWjdtvLbQ/ZHjp8ZLjb1QEoSafhP2h7uiQVz4caLRgRqyJiMCIGp04Z6HB1AMrWafjX\nSVpaTC+V9Fw55QDol3a6+p6Q9KKkS2zvtb1M0gpJ19jeJel7xWsA40jL3+2PiCUNmq4uuZZxq9Xv\nz1fpC3PnNG3/y/rH+1QJ6oY7/ICkCD+QFOEHkiL8QFKEH0iK8ANJMUR3mza8t6Xj91764o+atp//\nwx0df7YkHbjj2w3bXv/5Q119Ns5c7PmBpAg/kBThB5Ii/EBShB9IivADSRF+ICn6+fugVT/+wKWX\nNG1fv/GpFmvo/B4E5MWeH0iK8ANJEX4gKcIPJEX4gaQIP5AU4QeSop+/Blr34wPlY88PJEX4gaQI\nP5AU4QeSIvxAUoQfSIrwA0m1DL/tNbYP2d4+at69tvfZ3lI8Fva2TABla2fP/4ikBWPMfyAi5haP\n9eWWBaDXWoY/IjZLOtqHWgD0UTfn/Lfb3lqcFkwqrSIAfdFp+FdKuljSXEn7Jd3XaEHby20P2R46\nfGS4w9UBKFtH4Y+IgxExHBGfSFotaV6TZVdFxGBEDE6dMtBpnQBK1lH4bU8f9fIGSdsbLQugnlp+\npdf2E5KuknSu7b2SfiXpKttzJYWkPZJu6WGNAHqgZfgjYskYsx/uQS0A+og7/ICkCD+QFOEHkiL8\nQFKEH0iK8ANJ8dPd6Kn/nDhedQlogD0/kBThB5Ii/EBShB9IivADSRF+ICnCDyRFP38NXHfe3MrW\n/b/vX960/R+rV3f1+T+5YH5X70fvsOcHkiL8QFKEH0iK8ANJEX4gKcIPJEX4gaTo5++DDe9tqWzd\nX3/g1qbt5/32n03bu70H4cMfNBzMSV/688tN37ty9lebti+ucLueCdjzA0kRfiApwg8kRfiBpAg/\nkBThB5Ii/EBSLfv5bc+U9KikaZJC0qqIeND2ZElPSZolaY+kGyPiv70rFZ3YdudDzRe4s9cVNO6L\nr/J3DNDenv+EpLsiYo6kKyTdZnuOpLslbYqI2ZI2Fa8BjBMtwx8R+yPitWL6mKSdkmZIWiRpbbHY\nWkmLe1UkgPJ9rnN+27MkXSbpJUnTImJ/0XRAI6cFAMaJtsNv+2xJT0u6IyI+GN0WEaGR6wFjvW+5\n7SHbQ4ePDHdVLIDytBV+2xM0EvzHIuKZYvZB29OL9umSDo313ohYFRGDETE4dcpAGTUDKEHL8Nu2\npIcl7YyI+0c1rZO0tJheKum58ssD0CvtfKX3Skk3S9pm+2S/zT2SVkj6k+1lkt6RdGNvSsSZqtVX\nnVt1BXbTVVjl16zromX4I+IFSW7QfHW55QDoF+7wA5Ii/EBShB9IivADSRF+ICnCDyTFT3ejtrq9\nDwDNsecHkiL8QFKEH0iK8ANJEX4gKcIPJEX4gaTo58e4xXfyu8OeH0iK8ANJEX4gKcIPJEX4gaQI\nP5AU4QeSIvxAUoQfSIrwA0kRfiApwg8kRfiBpAg/kBThB5JqGX7bM23/3fYbtnfY/lkx/17b+2xv\nKR4Le18ugLK082MeJyTdFRGv2T5H0qu2NxZtD0TE73pXHoBeaRn+iNgvaX8xfcz2Tkkzel0YgN76\nXOf8tmdJukzSS8Ws221vtb3G9qQG71lue8j20OEjw10VC6A8bYff9tmSnpZ0R0R8IGmlpIslzdXI\nkcF9Y70vIlZFxGBEDE6dMlBCyQDK0Fb4bU/QSPAfi4hnJCkiDkbEcER8Imm1pHm9KxNA2dq52m9J\nD0vaGRH3j5o/fdRiN0jaXn55AHqlnav9V0q6WdI22yd/K/keSUtsz5UUkvZIuqUnFQLoiXau9r8g\nyWM0rS+/HAD9wh1+QFKEH0iK8ANJEX4gKcIPJEX4gaQIP5AU4QeSIvxAUoQfSIrwA0kRfiApwg8k\nRfiBpBwR/VuZfVjSO6NmnSvp/b4V8PnUtba61iVRW6fKrO3CiJjazoJ9Df9pK7eHImKwsgKaqGtt\nda1LorZOVVUbh/1AUoQfSKrq8K+qeP3N1LW2utYlUVunKqmt0nN+ANWpes8PoCKVhN/2Attv2t5t\n++4qamjE9h7b24qRh4cqrmWN7UO2t4+aN9n2Rtu7iucxh0mrqLZajNzcZGTpSrdd3Ua87vthv+0B\nSf+WdI2kvZJekbQkIt7oayEN2N4jaTAiKu8Ttv0dScclPRoRXyvm/UbS0YhYUfzjnBQRv6hJbfdK\nOl71yM3FgDLTR48sLWmxpB+rwm3XpK4bVcF2q2LPP0/S7oh4OyI+kvSkpEUV1FF7EbFZ0tFTZi+S\ntLaYXquRP56+a1BbLUTE/oh4rZg+JunkyNKVbrsmdVWiivDPkPTuqNd7Va8hv0PS87Zftb286mLG\nMK0YNl2SDkiaVmUxY2g5cnM/nTKydG22XScjXpeNC36nmx8R35R0vaTbisPbWoqRc7Y6dde0NXJz\nv4wxsvSnqtx2nY54XbYqwr9P0sxRr88v5tVCROwrng9Jelb1G3344MlBUovnQxXX86k6jdw81sjS\nqsG2q9OI11WE/xVJs21fZPssSTdJWldBHaexPbG4ECPbEyVdq/qNPrxO0tJieqmk5yqs5TPqMnJz\no5GlVfG2q92I1xHR94ekhRq54v+WpF9WUUODur4i6fXisaPq2iQ9oZHDwI81cm1kmaQpkjZJ2iXp\nb5Im16i2P0raJmmrRoI2vaLa5mvkkH6rpC3FY2HV265JXZVsN+7wA5Ligh+QFOEHkiL8QFKEH0iK\n8ANJEX4gKcIPJEX4gaT+D8kFqXGPpjBrAAAAAElFTkSuQmCC\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAADG9JREFUeJzt3X+o3XUdx/HXq+si0gI311pzNVtT\nmlYzLiNzhJHVHMKMQlxmi4ZXSkHDPxIjMuiPEWb0Rw1uOVplWqDmiJHNYYootqus/cxt2sStuc0t\ncAZRu777434Xt+2e7zk753vO99z7fj7gcL7n+/me+337xde+Pz7f8/04IgQgn7fUXQCAehB+ICnC\nDyRF+IGkCD+QFOEHkiL8QFKEH0iK8ANJndXLlZ03fSDmzZ3Wy1UCqex75T967dioW1m2o/DbXirp\nR5IGJP0sIlaXLT9v7jT9+dG5nawSQInFn32l5WXbPuy3PSDpx5KukrRQ0grbC9v9ewB6q5Nz/sWS\n9kbESxHxb0kPSFpeTVkAuq2T8M+RNP4YY38x7//YHrI9YnvkyNHRDlYHoEpdv9ofEcMRMRgRgzNn\nDHR7dQBa1En4D0gaf/Xu/GIegEmgk/BvlrTA9gW23yrpOknrqykLQLe13dUXESds3yLpUY119a2N\niB2VVQagqzrq54+IDZI2VFQLgB7i9l4gKcIPJEX4gaQIP5AU4QeSIvxAUoQfSIrwA0kRfiApwg8k\nRfiBpAg/kBThB5Ii/EBShB9IivADSRF+ICnCDyRF+IGkCD+QFOEHkiL8QFKEH0iK8ANJEX4gKcIP\nJEX4gaQIP5AU4QeS6miUXtv7JB2XNCrpREQMVlEUgO7rKPyFT0bEaxX8HQA9xGE/kFSn4Q9Jf7T9\nnO2hKgoC0BudHvYviYgDtt8laaPtv0bEk+MXKP5RGJKk986p4iwDQBU62vNHxIHi/bCkhyUtnmCZ\n4YgYjIjBmTMGOlkdgAq1HX7bZ9t+x8lpSZ+RtL2qwgB0VyfH4bMkPWz75N/5dUT8oZKqAHRd2+GP\niJckfaTCWrrqs+9ZVHcJ6LF937usYdsLX13Tw0r6E119QFKEH0iK8ANJEX4gKcIPJEX4gaTS3G97\n/V/3l7av3r60R5XgpLlf6O49YWd98PWGbRc/c33pd3dcdl/V5fQd9vxAUoQfSIrwA0kRfiApwg8k\nRfiBpAg/kFSafv4vv7P8AcNf/vivelRJLh+65+ttf3dg4YWl7aM7d5e2Z+ir7wR7fiApwg8kRfiB\npAg/kBThB5Ii/EBShB9IKk0//2T2u3+eU9q+ZsEHGrY9+vctVZdzRt5z99MN25rVtuzK8n5+dIY9\nP5AU4QeSIvxAUoQfSIrwA0kRfiApwg8k1bSf3/ZaSVdLOhwRlxTzpkv6jaR5kvZJujYi/tG9Mqe2\nZVd8vrR9dPeLParkzDH0+eTVyp7/55JOHdHiDkmbImKBpE3FZwCTSNPwR8STko6dMnu5pHXF9DpJ\n11RcF4Aua/ecf1ZEHCymX5U0q6J6APRIxxf8IiIkRaN220O2R2yPHDk62unqAFSk3fAfsj1bkor3\nw40WjIjhiBiMiMGZMwbaXB2AqrUb/vWSVhbTKyU9Uk05AHqlafht3y/pGUkX2d5ve5Wk1ZI+bXuP\npCuLzwAmkab9/BGxokHTpyquZdK6atkXS9vf3LKzyV/o3378ZVde22SJ8mfn1/08ATTGHX5AUoQf\nSIrwA0kRfiApwg8kRfiBpHh0d2H/iTdK21e9d0lJa3lX3t8e+HBp++5P/KK0vdnPZvc/eHFJa3lX\n2wW/v7G0/cKdm0vb529+W2k7+hd7fiApwg8kRfiBpAg/kBThB5Ii/EBShB9IKk0/f9P+7KHy/uyB\nBe9v2LbhiYearL2+n7U2u3+h6X/3hfNL238y58Ezrgn9gT0/kBThB5Ii/EBShB9IivADSRF+ICnC\nDyQ1Zfr5m/3m/UKV92d/Y++u0valb+/fR1Cf//kdDdtWqew5BM1t+BP9+FMVe34gKcIPJEX4gaQI\nP5AU4QeSIvxAUoQfSKppP7/ttZKulnQ4Ii4p5t0l6UZJR4rF7oyIDd0qsgqTeajogYsvKm0f3fFC\nw7byZ/pLOy67r62aprqL7v1aafu7nx0tbX9ieLjKcrqilT3/zyUtnWD+DyNiUfHq6+ADOF3T8EfE\nk5KO9aAWAD3UyTn/Lba32l5r+9zKKgLQE+2Gf42k+ZIWSToo6QeNFrQ9ZHvE9siRo+XnSQB6p63w\nR8ShiBiNiDcl/VTS4pJlhyNiMCIGZ84YaLdOABVrK/y2Z4/7+DlJ26spB0CvtNLVd7+kKySdZ3u/\npO9IusL2IkkhaZ+km7pYI4AuaBr+iFgxwex7u1BLRyZzP34zGzb+poNvT93t0omFT3+ptH3et58p\nbf/X1Q3PdCcN7vADkiL8QFKEH0iK8ANJEX4gKcIPJDVlHt0NnKrsce5zm9yX1uxn1JPhJ7vNsOcH\nkiL8QFKEH0iK8ANJEX4gKcIPJEX4gaTo50ff2vDYb0vbmw3LfvTGyxq2jXx3TZO1T/2fQrPnB5Ii\n/EBShB9IivADSRF+ICnCDyRF+IGk6OfHpNX8ce1Tv6++E+z5gaQIP5AU4QeSIvxAUoQfSIrwA0kR\nfiCppuG3Pdf247Z32t5h+9Zi/nTbG23vKd7P7X65AKrSyp7/hKTbI2KhpI9Jutn2Qkl3SNoUEQsk\nbSo+A5gkmoY/Ig5GxPPF9HFJuyTNkbRc0rpisXWSrulWkQCqd0bn/LbnSbpU0rOSZkXEwaLpVUmz\nKq0MQFe1HH7b50h6UNJtEfH6+LaICEnR4HtDtkdsjxw5OtpRsQCq01L4bU/TWPDvi4iHitmHbM8u\n2mdLOjzRdyNiOCIGI2Jw5oyBKmoGUIFWrvZb0r2SdkXEPeOa1ktaWUyvlPRI9eUB6JZWftJ7uaQb\nJG2zffI3kndKWi3pt7ZXSXpZ0rXdKRFANzQNf0Q8JckNmj9VbTkAeoU7/ICkCD+QFOEHkiL8QFKE\nH0iK8ANJEX4gKcIPJEX4gaQIP5AU4QeSIvxAUoQfSIrwA0kRfiApwg8kRfiBpAg/kBThB5Ii/EBS\nhB9IivADSRF+ICnCDyRF+IGkCD+QFOEHkiL8QFKEH0iK8ANJNQ2/7bm2H7e90/YO27cW8++yfcD2\nluK1rPvlAqjKWS0sc0LS7RHxvO13SHrO9sai7YcRcXf3ygPQLU3DHxEHJR0spo/b3iVpTrcLA9Bd\nZ3TOb3uepEslPVvMusX2VttrbZ/b4DtDtkdsjxw5OtpRsQCq03L4bZ8j6UFJt0XE65LWSJovaZHG\njgx+MNH3ImI4IgYjYnDmjIEKSgZQhZbCb3uaxoJ/X0Q8JEkRcSgiRiPiTUk/lbS4e2UCqForV/st\n6V5JuyLinnHzZ49b7HOStldfHoBuaeVq/+WSbpC0zfaWYt6dklbYXiQpJO2TdFNXKgTQFa1c7X9K\nkido2lB9OQB6hTv8gKQIP5AU4QeSIvxAUoQfSIrwA0kRfiApwg8kRfiBpAg/kBThB5Ii/EBShB9I\nivADSTkiercy+4ikl8fNOk/Saz0r4Mz0a239WpdEbe2qsrb3RcTMVhbsafhPW7k9EhGDtRVQol9r\n69e6JGprV121cdgPJEX4gaTqDv9wzesv06+19WtdErW1q5baaj3nB1Cfuvf8AGpSS/htL7X9gu29\ntu+oo4ZGbO+zva0YeXik5lrW2j5se/u4edNtb7S9p3ifcJi0mmrri5GbS0aWrnXb9duI1z0/7Lc9\nIGm3pE9L2i9ps6QVEbGzp4U0YHufpMGIqL1P2PYnJL0h6RcRcUkx7/uSjkXE6uIfznMj4pt9Uttd\nkt6oe+TmYkCZ2eNHlpZ0jaSvqMZtV1LXtaphu9Wx518saW9EvBQR/5b0gKTlNdTR9yLiSUnHTpm9\nXNK6Ynqdxv7n6bkGtfWFiDgYEc8X08clnRxZutZtV1JXLeoI/xxJr4z7vF/9NeR3SPqj7edsD9Vd\nzARmFcOmS9KrkmbVWcwEmo7c3EunjCzdN9uunRGvq8YFv9MtiYiPSrpK0s3F4W1firFztn7qrmlp\n5OZemWBk6f+pc9u1O+J11eoI/wFJc8d9Pr+Y1xci4kDxfljSw+q/0YcPnRwktXg/XHM9/9NPIzdP\nNLK0+mDb9dOI13WEf7OkBbYvsP1WSddJWl9DHaexfXZxIUa2z5b0GfXf6MPrJa0spldKeqTGWv5P\nv4zc3GhkadW87fpuxOuI6PlL0jKNXfF/UdK36qihQV3vl/SX4rWj7tok3a+xw8D/aOzayCpJMyRt\nkrRH0mOSpvdRbb+UtE3SVo0FbXZNtS3R2CH9VklbiteyurddSV21bDfu8AOS4oIfkBThB5Ii/EBS\nhB9IivADSRF+ICnCDyRF+IGk/gsKoNZjuCPnqwAAAABJRU5ErkJggg==\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAADEJJREFUeJzt3X/oXfV9x/Hne1/tQO3AxCxLY7ak\nRYVMbBxfwjpd6eiq0Ra03bAK6zKQRkaFdpQxcX/MP2Wslf2xCekMTUdnLVhRZvBHpeC6dtWvovHX\nbGxIMTGaGAdVuq2avvfH96T7Vr/3R+4995779f18wOWe+/mce887h7y+59zzufd+IjORVM+vdF2A\npG4Yfqkowy8VZfilogy/VJThl4oy/FJRhl8qyvBLRZ0yzY2dtWouN244dZqblEo58OKbvPra8Rhm\n3bHCHxHbgL8H5oB/ysyb+62/ccOpPHL/hnE2KamPrZe+OPS6I5/2R8Qc8A/AZcBm4JqI2Dzq60ma\nrnHe828FXsjM/Zn5M+AbwBXtlCVp0sYJ/3pg6TnGwabtl0TEjohYiIiFo8eOj7E5SW2a+NX+zNyZ\nmfOZOb9m9dykNydpSOOE/xCw9Ord2U2bpBVgnPA/CpwTEZsi4j3A1cA97ZQladJGHurLzLci4nrg\nfhaH+nZl5jOtVSZposYa58/MPcCelmqRNEV+vFcqyvBLRRl+qSjDLxVl+KWiDL9UlOGXijL8UlGG\nXyrK8EtFGX6pKMMvFWX4paIMv1SU4ZeKMvxSUYZfKsrwS0UZfqkowy8VZfilogy/VJThl4oy/FJR\nhl8qyvBLRRl+qSjDLxVl+KWixpqlNyIOAK8Dx4G3MnO+jaIkTd5Y4W/8QWa+2sLrSJoiT/ulosYN\nfwIPRMRjEbGjjYIkTce4p/0XZ+ahiPh14MGI+M/MfHjpCs0fhR0Av7m+jXcZktow1pE/Mw8190eA\nu4Cty6yzMzPnM3N+zeq5cTYnqUUjhz8iTo+I955YBi4Bnm6rMEmTNc55+Frgrog48Tr/kpn3tVKV\npIkbOfyZuR/4YIu1SJoih/qkogy/VJThl4oy/FJRhl8qyvBLRfl523e56w5+qG//f9x+4US3/+Rf\n/uNEX1+j88gvFWX4paIMv1SU4ZeKMvxSUYZfKsrwS0U5zr8CXLT3U337z9i2v0/vf/d97m/wvREq\nGt6lt2zp2XfK2ev7PvfeR+5tuxwt4ZFfKsrwS0UZfqkowy8VZfilogy/VJThl4pynH8GXPq+3mPh\nAGfQbxwf5jaf27Nvz7e/OVJNbfnQk3/Us+/XLvtR3+cO2i/3v/TESDVpkUd+qSjDLxVl+KWiDL9U\nlOGXijL8UlGGXypq4Dh/ROwCPgEcyczzm7ZVwB3ARuAAcFVm/tfkylzZBo1XDzJ4PHt2x7u//8E7\ne3e+1P+54+439TfMkf+rwLa3td0APJSZ5wAPNY8lrSADw5+ZDwOvva35CmB3s7wbuLLluiRN2Kjv\n+ddm5uFm+WVgbUv1SJqSsS/4ZWYC2as/InZExEJELBw9dnzczUlqyajhfyUi1gE090d6rZiZOzNz\nPjPn16yeG3Fzkto2avjvAbY3y9uBu9spR9K0DAx/RNwOfB84LyIORsS1wM3AxyJiH/CHzWNJK8jA\ncf7MvKZH10dbrmXFOu/f/rRv/0b29u33e+nqgp/wk4oy/FJRhl8qyvBLRRl+qSjDLxXlT3e3YOOn\n+w/lvfwXvzfgFWoO9W3618/27T+XR6dUSU0e+aWiDL9UlOGXijL8UlGGXyrK8EtFGX6pKMf5p+B/\nVvX8lbPSzt3Rfxz/z/e9MKVKavLILxVl+KWiDL9UlOGXijL8UlGGXyrK8EtFOc4/Bc9fe2vXJXRm\n8/f+pGffBp7u+9wrT3+j7XK0hEd+qSjDLxVl+KWiDL9UlOGXijL8UlGGXypq4Dh/ROwCPgEcyczz\nm7abgM8CR5vVbszMPZMqcqU79+H+U3j/8MNfm1Il7Rv0b9t0de85Dd647/0DXr3mfAbTMsyR/6vA\ntmXab8nMLc3N4EsrzMDwZ+bDwGtTqEXSFI3znv/6iNgbEbsi4szWKpI0FaOG/1bgA8AW4DDwpV4r\nRsSOiFiIiIWjx46PuDlJbRsp/Jn5SmYez8yfA18BtvZZd2dmzmfm/JrVc6PWKallI4U/ItYtefhJ\nGPD1LEkzZ5ihvtuBjwBnRcRB4G+Aj0TEFiCBA8B1E6xR0gQMDH9mXrNM820TqGXFOnDHBX37N326\n91g3wMfP/njf/nsfufekaxpWv+/bA2z44/4ndZvo/2/rt2+ev2Dlfr7h3cBP+ElFGX6pKMMvFWX4\npaIMv1SU4ZeK8qe7W/D87/cfsrrovk/17T9j2/6+/Ze+b8tJ1zSsQT+fPffb5/Xt3/PgHQO24Ndy\nZ5VHfqkowy8VZfilogy/VJThl4oy/FJRhl8qynH+Kfj3C77Vf4WX+nc/8NNT+/b/NH/1JCv6f6fF\n//btv+Q0x+nfrTzyS0UZfqkowy8VZfilogy/VJThl4oy/FJRjvOvAJec9uaANQb1S+/kkV8qyvBL\nRRl+qSjDLxVl+KWiDL9UlOGXihoY/ojYEBHfiYhnI+KZiPh8074qIh6MiH3N/ZmTL1dSW4Y58r8F\nfDEzNwO/C3wuIjYDNwAPZeY5wEPNY0krxMDwZ+bhzHy8WX4deA5YD1wB7G5W2w1cOakiJbXvpN7z\nR8RG4ELgB8DazDzcdL0MrG21MkkTNXT4I+IM4E7gC5n5k6V9mZlA9njejohYiIiFo8eOj1WspPYM\nFf6IOJXF4H89M0/8GuUrEbGu6V8HHFnuuZm5MzPnM3N+zeq5NmqW1IJhrvYHcBvwXGZ+eUnXPcD2\nZnk7cHf75UmalGG+0nsR8BngqYg48TvONwI3A9+MiGuBHwNXTaZESZMwMPyZ+V0genR/tN1yJE2L\nn/CTijL8UlGGXyrK8EtFGX6pKMMvFWX4paIMv1SU4ZeKMvxSUYZfKsrwS0UZfqkowy8VZfilogy/\nVJThl4oy/FJRhl8qyvBLRRl+qSjDLxVl+KWiDL9UlOGXijL8UlGGXyrK8EtFGX6pKMMvFTUw/BGx\nISK+ExHPRsQzEfH5pv2miDgUEU80t8snX66ktpwyxDpvAV/MzMcj4r3AYxHxYNN3S2b+3eTKkzQp\nA8OfmYeBw83y6xHxHLB+0oVJmqyTes8fERuBC4EfNE3XR8TeiNgVEWf2eM6OiFiIiIWjx46PVayk\n9gwd/og4A7gT+EJm/gS4FfgAsIXFM4MvLfe8zNyZmfOZOb9m9VwLJUtqw1Dhj4hTWQz+1zPzWwCZ\n+UpmHs/MnwNfAbZOrkxJbRvman8AtwHPZeaXl7SvW7LaJ4Gn2y9P0qQMc7X/IuAzwFMR8UTTdiNw\nTURsARI4AFw3kQolTcQwV/u/C8QyXXvaL0fStPgJP6kowy8VZfilogy/VJThl4oy/FJRhl8qyvBL\nRRl+qSjDLxVl+KWiDL9UlOGXijL8UlGRmdPbWMRR4MdLms4CXp1aASdnVmub1brA2kbVZm2/lZlr\nhllxquF/x8YjFjJzvrMC+pjV2ma1LrC2UXVVm6f9UlGGXyqq6/Dv7Hj7/cxqbbNaF1jbqDqprdP3\n/JK60/WRX1JHOgl/RGyLiOcj4oWIuKGLGnqJiAMR8VQz8/BCx7XsiogjEfH0krZVEfFgROxr7ped\nJq2j2mZi5uY+M0t3uu9mbcbrqZ/2R8Qc8EPgY8BB4FHgmsx8dqqF9BARB4D5zOx8TDgiPgy8AXwt\nM89v2v4WeC0zb27+cJ6ZmX81I7XdBLzR9czNzYQy65bOLA1cCfwZHe67PnVdRQf7rYsj/1bghczc\nn5k/A74BXNFBHTMvMx8GXntb8xXA7mZ5N4v/eaauR20zITMPZ+bjzfLrwImZpTvdd33q6kQX4V8P\nvLjk8UFma8rvBB6IiMciYkfXxSxjbTNtOsDLwNoui1nGwJmbp+ltM0vPzL4bZcbrtnnB750uzszf\nAS4DPtec3s6kXHzPNkvDNUPN3Dwty8ws/Qtd7rtRZ7xuWxfhPwRsWPL47KZtJmTmoeb+CHAXszf7\n8CsnJklt7o90XM8vzNLMzcvNLM0M7LtZmvG6i/A/CpwTEZsi4j3A1cA9HdTxDhFxenMhhog4HbiE\n2Zt9+B5ge7O8Hbi7w1p+yazM3NxrZmk63nczN+N1Zk79BlzO4hX/HwF/3UUNPep6P/Bkc3um69qA\n21k8DXyTxWsj1wKrgYeAfcC3gVUzVNs/A08Be1kM2rqOaruYxVP6vcATze3yrvddn7o62W9+wk8q\nygt+UlGGXyrK8EtFGX6pKMMvFWX4paIMv1SU4ZeK+j/3M70Cl9MZ3AAAAABJRU5ErkJggg==\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAC6lJREFUeJzt3V2MXHd5gPHn7WKK5HARO65lHINp\n6iK5kWqqxeIjqkB8OEQIG1FFsVTqSlGNWiIVCVVE6QW5jCog4qJEMsTCQAgghTS+sDDBQqSofGQT\nGSdOShMi09hxbMdGIukNZHl7scfRkuzOTGbOzBnv+/yk1c6eM7vzauTHZ2bO7P4jM5FUzx91PYCk\nbhi/VJTxS0UZv1SU8UtFGb9UlPFLRRm/VJTxS0W9ZpI3dsWamdy8adUkb1Iq5cTTv+O5C/MxyHVH\nij8irgW+AMwAX87M23pdf/OmVfzs8KZRblJSD9t3PD3wdYd+2B8RM8C/Ax8EtgK7I2LrsD9P0mSN\n8px/O/BkZj6Vmb8FvgnsbGcsSeM2SvwbgcWPMU422/5AROyNiLmImDt3fn6Em5PUprG/2p+Z+zJz\nNjNn162dGffNSRrQKPGfAha/endls03SJWCU+B8EtkTEmyPitcANwMF2xpI0bkOf6svMFyPiJuAw\nC6f69mfm8dYmkzRWI53nz8xDwKGWZpE0Qb69VyrK+KWijF8qyvilooxfKsr4paKMXyrK+KWijF8q\nyvilooxfKsr4paKMXyrK+KWijF8qyvilooxfKsr4paKMXyrK+KWijF8qyvilooxfKsr4paKMXyrK\n+KWijF8qyvilooxfKmqkVXoj4gTwPDAPvJiZs20MtdLseMO2nvsPP3N0QpNM3tb/+ttl9236m0d7\nfu9Kvl+mwUjxN96Tmc+18HMkTZAP+6WiRo0/ge9FxEMRsbeNgSRNxqgP+6/JzFMR8SfA/RHx35n5\nwOIrNP8p7AV448Y2nmVIasNIR/7MPNV8PgvcC2xf4jr7MnM2M2fXrZ0Z5eYktWjo+CNidUS8/uJl\n4ANA75dvJU2NUR6HrwfujYiLP+cbmfndVqaSNHZDx5+ZTwF/2eIskibIU31SUcYvFWX8UlHGLxVl\n/FJRxi8V5fttW/CW//y7nvs3c2xCk0yfx9759WX37aD3rzprvDzyS0UZv1SU8UtFGb9UlPFLRRm/\nVJTxS0V5nr8Fr/vJZV2PsCL9x//1vl93rX5hQpOsTB75paKMXyrK+KWijF8qyvilooxfKsr4paI8\nz6+pdceWP+u5f5dLeI/EI79UlPFLRRm/VJTxS0UZv1SU8UtFGb9UVN/z/BGxH/gQcDYzr262rQG+\nBWwGTgDXZ+avxzemVqKDpx7suf/DG982oUlqGuTI/xXg2pdtuxk4kplbgCPN15IuIX3jz8wHgAsv\n27wTONBcPgDsankuSWM27HP+9Zl5urn8LLC+pXkkTcjIL/hlZgK53P6I2BsRcxExd+78/Kg3J6kl\nw8Z/JiI2ADSfzy53xczcl5mzmTm7bu3MkDcnqW3Dxn8Q2NNc3gPc1844kialb/wRcTfwY+AtEXEy\nIm4EbgPeHxFPAO9rvpZ0Cel7nj8zdy+z670tz3LJ+vm/fLHn/h23uw79Uv44Vo30/f906u09939x\n409G+vkrne/wk4oyfqko45eKMn6pKOOXijJ+qSj/dPcU2PEGTwUO44f/e1XvK3iqryeP/FJRxi8V\nZfxSUcYvFWX8UlHGLxVl/FJRnuefAjNb/7y7G4/o7rb7mD/+i65HWNE88ktFGb9UlPFLRRm/VJTx\nS0UZv1SU8UtFeZ5/Chz6/re7HmEq+XcOxssjv1SU8UtFGb9UlPFLRRm/VJTxS0UZv1RU3/P8EbEf\n+BBwNjOvbrbdCvwDcK652i2ZeWhcQ650/c5nH37m6IQmUSWDHPm/Aly7xPbbM3Nb82H40iWmb/yZ\n+QBwYQKzSJqgUZ7z3xQRxyJif0Rc3tpEkiZi2PjvAK4CtgGngc8td8WI2BsRcxExd+78/JA3J6lt\nQ8WfmWcycz4zfw98Cdje47r7MnM2M2fXrZ0Zdk5JLRsq/ojYsOjLjwCPtjOOpEkZ5FTf3cC7gSsi\n4iTwGeDdEbENSOAE8PExzihpDPrGn5m7l9h85xhmWbFO3vMXPfdf+dHjE5pkuoz6+/rH33FXS5PU\n5Dv8pKKMXyrK+KWijF8qyvilooxfKso/3a2p9Y9PPNn1CCuaR36pKOOXijJ+qSjjl4oyfqko45eK\nMn6pKM/zT0C/Xz3dgUtRa/I88ktFGb9UlPFLRRm/VJTxS0UZv1SU8UtFeZ5fnen3J813rXZp8nHy\nyC8VZfxSUcYvFWX8UlHGLxVl/FJRxi8V1fc8f0RsAr4KrAcS2JeZX4iINcC3gM3ACeD6zPz1+EbV\nSuMS290a5Mj/IvCpzNwKvB34RERsBW4GjmTmFuBI87WkS0Tf+DPzdGY+3Fx+Hngc2AjsBA40VzsA\n7BrXkJLa96qe80fEZuCtwE+B9Zl5utn1LAtPCyRdIgaOPyIuA+4BPpmZv1m8LzOThdcDlvq+vREx\nFxFz587PjzSspPYMFH9ErGIh/Lsy8zvN5jMRsaHZvwE4u9T3Zua+zJzNzNl1a2famFlSC/rGHxEB\n3Ak8npmfX7TrILCnubwHuK/98SSNyyC/0vsu4GPAIxFx8XcsbwFuA74dETcCvwKuH8+IK9/hZ/zV\nVU1e3/gz80dALLP7ve2OI2lSfIefVJTxS0UZv1SU8UtFGb9UlPFLRRm/VJTxS0UZv1SU8UtFGb9U\nlPFLRRm/VJTxS0UZv1SU8UtFGb9UlPFLRRm/VJTxS0UZv1SU8UtFGb9UlPFLRRm/VJTxS0UZv1SU\n8UtFGb9UlPFLRfWNPyI2RcQPIuKxiDgeEf/cbL81Ik5FxNHm47rxjyupLa8Z4DovAp/KzIcj4vXA\nQxFxf7Pv9sz87PjGkzQufePPzNPA6eby8xHxOLBx3INJGq9X9Zw/IjYDbwV+2my6KSKORcT+iLh8\nme/ZGxFzETF37vz8SMNKas/A8UfEZcA9wCcz8zfAHcBVwDYWHhl8bqnvy8x9mTmbmbPr1s60MLKk\nNgwUf0SsYiH8uzLzOwCZeSYz5zPz98CXgO3jG1NS2wZ5tT+AO4HHM/Pzi7ZvWHS1jwCPtj+epHEZ\n5NX+dwEfAx6JiKPNtluA3RGxDUjgBPDxsUwoaSwGebX/R0AssetQ++NImhTf4ScVZfxSUcYvFWX8\nUlHGLxVl/FJRxi8VZfxSUcYvFWX8UlHGLxVl/FJRxi8VZfxSUZGZk7uxiHPArxZtugJ4bmIDvDrT\nOtu0zgXONqw2Z3tTZq4b5IoTjf8VNx4xl5mznQ3Qw7TONq1zgbMNq6vZfNgvFWX8UlFdx7+v49vv\nZVpnm9a5wNmG1clsnT7nl9Sdro/8kjrSSfwRcW1E/CIinoyIm7uYYTkRcSIiHmlWHp7reJb9EXE2\nIh5dtG1NRNwfEU80n5dcJq2j2aZi5eYeK0t3et9N24rXE3/YHxEzwP8A7wdOAg8CuzPzsYkOsoyI\nOAHMZmbn54Qj4q+BF4CvZubVzbZ/Ay5k5m3Nf5yXZ+anp2S2W4EXul65uVlQZsPilaWBXcDf0+F9\n12Ou6+ngfuviyL8deDIzn8rM3wLfBHZ2MMfUy8wHgAsv27wTONBcPsDCP56JW2a2qZCZpzPz4eby\n88DFlaU7ve96zNWJLuLfCDy96OuTTNeS3wl8LyIeioi9XQ+zhPXNsukAzwLruxxmCX1Xbp6kl60s\nPTX33TArXrfNF/xe6ZrM/Cvgg8Anmoe3UykXnrNN0+magVZunpQlVpZ+SZf33bArXreti/hPAZsW\nfX1ls20qZOap5vNZ4F6mb/XhMxcXSW0+n+14npdM08rNS60szRTcd9O04nUX8T8IbImIN0fEa4Eb\ngIMdzPEKEbG6eSGGiFgNfIDpW334ILCnubwHuK/DWf7AtKzcvNzK0nR8303diteZOfEP4DoWXvH/\nJfCvXcywzFx/Cvy8+Tje9WzA3Sw8DPwdC6+N3AisBY4ATwDfB9ZM0WxfAx4BjrEQ2oaOZruGhYf0\nx4Cjzcd1Xd93Pebq5H7zHX5SUb7gJxVl/FJRxi8VZfxSUcYvFWX8UlHGLxVl/FJR/w84hqDlbTB+\n5wAAAABJRU5ErkJggg==\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAADR5JREFUeJzt3V+MHfV5xvHnyWIa1U4kbFzXfzY1\nJCaVi4TTbq2IoCRVCsYIyU4rufFF6kqIjZoglYqSIveiXKIEQrhIUJdiYSpCiJQgrNaKIVYVGjVN\nvFDbQFzAcTfC/42NBFaagDdvL3YMC94z5/icOTNn/X4/0mrnzG/mzKvRPjtz5ndmfo4IAcjnfU0X\nAKAZhB9IivADSRF+ICnCDyRF+IGkCD+QFOEHkiL8QFIX1bmxS+cPxfLhOXVuEkhl4pW39OqpSXey\nbE/ht329pPskDUn654i4q2z55cNz9NMdw71sEkCJ1Wte6XjZrk/7bQ9J+oaktZJWStpoe2W37weg\nXr185l8taX9EHIiINyV9W9K6asoC0G+9hH+ppOnnGAeLee9ie9T2uO3xEycne9gcgCr1/Wp/RIxF\nxEhEjCxcMNTvzQHoUC/hPyRp+tW7ZcU8ALNAL+HfJWmF7ctsXyzpc5K2VVMWgH7ruqsvIs7YvkXS\nDk119W2JiBcqq+wCsmbJqtL2HYd311QJ8I6e+vkjYruk7RXVAqBGfL0XSIrwA0kRfiApwg8kRfiB\npAg/kFSt9/NjZp8aHS1t/+HYWE2VIBOO/EBShB9IivADSRF+ICnCDyRF+IGk6OqrweG/u7q0ffjB\nfTVVAryDIz+QFOEHkiL8QFKEH0iK8ANJEX4gKcIPJEU/fw2u3vDfpe0Td/9fafvBM6dL25ddNO+8\nawI48gNJEX4gKcIPJEX4gaQIP5AU4QeSIvxAUj3189uekPSGpElJZyJipIqiLjT/tOzHpe1rVD6E\n900fuqbKcgbGbfvLR3S/7rffqqmSnKr4ks+fRMSrFbwPgBpx2g8k1Wv4Q9KTtp+xXT7sDICB0utp\n/zURccj270h6yvb/RMTT0xco/imMStKHlnIrATAoejryR8Sh4vdxSY9LWj3DMmMRMRIRIwsXDPWy\nOQAV6jr8tufa/sDZaUnXSXq+qsIA9Fcv5+GLJD1u++z7fCsivl9JVQD6ruvwR8QBSVdVWEtaOw7v\nbrqErj3z6zdL2zdfds4nwbfd85E/KF33ly/vL21fP7f8OQcoR1cfkBThB5Ii/EBShB9IivADSRF+\nICm+b4ue/NFvXVzaXtaNuWZJ+a3M96/4SGn7+lncRToIOPIDSRF+ICnCDyRF+IGkCD+QFOEHkiL8\nQFL086Mxoy8dKG0fu+LymirJiSM/kBThB5Ii/EBShB9IivADSRF+ICnCDyRFP3+HnvzlnJZt7R5B\njf5o9zyAfprNj1s/iyM/kBThB5Ii/EBShB9IivADSRF+ICnCDyTVtp/f9hZJN0o6HhFXFvPmS3pM\n0nJJE5I2RMRr/SuzeV9f92clrS/WVsdMyu6Lb3dP/F+3GQYbF65OjvwPSbr+PfPukLQzIlZI2lm8\nBjCLtA1/RDwt6dR7Zq+TtLWY3ippfcV1Aeizbj/zL4qII8X0UUmLKqoHQE16vuAXESEpWrXbHrU9\nbnv8xMnJXjcHoCLdhv+Y7cWSVPw+3mrBiBiLiJGIGFm4YKjLzQGoWrfh3yZpUzG9SdIT1ZQDoC5t\nw2/7UUk/lvRR2wdt3yTpLknX2n5Z0p8WrwHMIm37+SNiY4umz1Rcy0Db/tRjTZfQlbE27evnnq6l\nDgwevuEHJEX4gaQIP5AU4QeSIvxAUoQfSIrwA0kRfiApwg8kRfiBpAg/kBThB5Ii/EBShB9IivAD\nSRF+ICnCDyRF+IGkCD+QFOEHkiL8QFKEH0iK8ANJEX4gKcIPJEX4gaQIP5AU4QeSIvxAUoQfSKrt\nEN22t0i6UdLxiLiymHenpJslnSgW2xwR2/tVJMp99D/+smXbcu0tXXfNklU9bfvorVeXtu/58jd7\nen/0TydH/ockXT/D/HsjYlXxQ/CBWaZt+CPiaUmnaqgFQI16+cx/i+29trfYvqSyigDUotvw3y/p\nw5JWSToi6Z5WC9oetT1ue/zEyckuNwegal2FPyKORcRkRPxG0gOSVpcsOxYRIxExsnDBULd1AqhY\nV+G3vXjay89Ker6acgDUpZOuvkclfVrSpbYPSvpHSZ+2vUpSSJqQ9IU+1gigD9qGPyI2zjD7wT7U\ngi69/7/mdb3u0b8t76dv53fv/c/S9jVfb/09gouWLild99920YPcT3zDD0iK8ANJEX4gKcIPJEX4\ngaQIP5BU264+DL49t5fcNnt7u7V397bxNu9/w7V/0bLtzAsv9rZt9IQjP5AU4QeSIvxAUoQfSIrw\nA0kRfiApwg8kRT8/+uq1u8+0bPvg2hoLwTk48gNJEX4gKcIPJEX4gaQIP5AU4QeSIvxAUvTzoydr\n1870ZPd3fHDPvpZtp79/eZt37/FZAyjFkR9IivADSRF+ICnCDyRF+IGkCD+QFOEHkmrbz297WNLD\nkhZJCkljEXGf7fmSHpO0XNKEpA0R8Vr/SkU3Hn790tL2R35/WY9baN2PL0k7Dpf11dOP36ROjvxn\nJN0WESslfVzSl2yvlHSHpJ0RsULSzuI1gFmibfgj4khEPFtMv6Gpf/VLJa2TtLVYbKuk9f0qEkD1\nzuszv+3lkj4m6SeSFkXEkaLpqKY+FgCYJToOv+15kr4r6daIeH16W0SEpq4HzLTeqO1x2+MnTk72\nVCyA6nQUfttzNBX8RyLie8XsY7YXF+2LJR2fad2IGIuIkYgYWbhgqIqaAVSgbfhtW9KDkvZFxNem\nNW2TtKmY3iTpierLA9AvndzS+wlJn5f0nO2zfTObJd0l6Tu2b5L0C0kb+lMi2lmzZFXf3nv0pQOl\n7X8+7/XSdgyutuGPiB9Jcovmz1RbDoC68A0/ICnCDyRF+IGkCD+QFOEHkiL8QFI8uvsCUHbb7GX/\nenPpuleM7iptH7ui/PHaY6Wt0tDKK1q2bf/Bd9qsjX7iyA8kRfiBpAg/kBThB5Ii/EBShB9IivAD\nSdHPf4H73xsfKF/gcG/v/8VDHy9t//kfv9SyrdfnEBy99erS9j1f/mZP73+h48gPJEX4gaQIP5AU\n4QeSIvxAUoQfSIrwA0l5aqSteoxc9f746Y7h2raHwXbVV79Y2r70ofLhvydf635E+PKhw2ev1Wte\n0fieX7V61P67cOQHkiL8QFKEH0iK8ANJEX4gKcIPJEX4gaTa3s9ve1jSw5IWSQpJYxFxn+07Jd0s\n6USx6OaI2N6vQnHh2XN7m/vtb+/t/cueF/Cp0dHSdX841m5Egtmvk4d5nJF0W0Q8a/sDkp6x/VTR\ndm9E3N2/8gD0S9vwR8QRSUeK6Tds75O0tN+FAeiv8/rMb3u5pI9J+kkx6xbbe21vsX1Ji3VGbY/b\nHj9xcrKnYgFUp+Pw254n6buSbo2I1yXdL+nDklZp6szgnpnWi4ixiBiJiJGFC4YqKBlAFToKv+05\nmgr+IxHxPUmKiGMRMRkRv5H0gKTV/SsTQNXaht+2JT0oaV9EfG3a/MXTFvuspOerLw9Av3Rytf8T\nkj4v6TnbZ++D3Cxpo+1Vmur+m5D0hb5UCHSp/LbdC/OW3vPRydX+H0ma6f5g+vSBWYxv+AFJEX4g\nKcIPJEX4gaQIP5AU4QeSIvxAUoQfSIrwA0kRfiApwg8kRfiBpAg/kBThB5KqdYhu2yck/WLarEsl\nvVpbAednUGsb1LokautWlbX9XkQs7GTBWsN/zsbt8YgYaayAEoNa26DWJVFbt5qqjdN+ICnCDyTV\ndPgHeUykQa1tUOuSqK1bjdTW6Gd+AM1p+sgPoCGNhN/29bZftL3f9h1N1NCK7Qnbz9nebXu84Vq2\n2D5u+/lp8+bbfsr2y8XvGYdJa6i2O20fKvbdbts3NFTbsO1/t/0z2y/Y/ptifqP7rqSuRvZb7af9\ntockvSTpWkkHJe2StDEiflZrIS3YnpA0EhGN9wnb/qSk05Iejogri3lfkXQqIu4q/nFeEhF/PyC1\n3SnpdNMjNxcDyiyePrK0pPWS/koN7ruSujaogf3WxJF/taT9EXEgIt6U9G1J6xqoY+BFxNOSTr1n\n9jpJW4vprZr646ldi9oGQkQciYhni+k3JJ0dWbrRfVdSVyOaCP9SSa9Me31QgzXkd0h60vYztkeb\nLmYGi4ph0yXpqKRFTRYzg7YjN9fpPSNLD8y+62bE66pxwe9c10TEH0paK+lLxentQIqpz2yD1F3T\n0cjNdZlhZOm3Nbnvuh3xumpNhP+QpOFpr5cV8wZCRBwqfh+X9LgGb/ThY2cHSS1+H2+4nrcN0sjN\nM40srQHYd4M04nUT4d8laYXty2xfLOlzkrY1UMc5bM8tLsTI9lxJ12nwRh/eJmlTMb1J0hMN1vIu\ngzJyc6uRpdXwvhu4Ea8jovYfSTdo6or/zyX9QxM1tKjrckl7ip8Xmq5N0qOaOg18S1PXRm6StEDS\nTkkvS/qBpPkDVNu/SHpO0l5NBW1xQ7Vdo6lT+r2aGo53d/E31+i+K6mrkf3GN/yApLjgByRF+IGk\nCD+QFOEHkiL8QFKEH0iK8ANJEX4gqf8H8Gf+Q+zn8YwAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAACwhJREFUeJzt3V2IXPUZx/Hfr9vYUvUib023MW2s\nhEIQGssQCkqx+BZDIXoj5kJSKl0vFBREKvaiXoZSlRaqsNZgWqwiqJiLUI1BCNK3bCTmtTVWVky6\nZtekYLypZn16sScyJruz48w5c058vh8YZuac2T0PQ76Z1+TviBCAfL5U9wAA6kH8QFLEDyRF/EBS\nxA8kRfxAUsQPJEX8QFLEDyT15UEebMmioVi5YsEgDwmkMv7ux3r/5LS7uW1f8dteJ+k3koYk/T4i\nNne6/coVC/SPl1b0c0gAHay94d2ub9vz037bQ5J+J+lGSaslbbS9utffB2Cw+nnNv1bSWxHxdkR8\nJOkZSRvKGQtA1fqJf7mk9ucYR4ttn2F7xPaY7bGpE9N9HA5AmSp/tz8iRiOiFRGtpYuHqj4cgC71\nE/8xSe3v3l1SbANwHugn/t2SVtm+1PYFkm6VtK2csQBUreeP+iLitO27JL2kmY/6tkTEwdImA1Cp\nvj7nj4jtkraXNAuAAeLrvUBSxA8kRfxAUsQPJEX8QFLEDyRF/EBSxA8kRfxAUsQPJEX8QFLEDyRF\n/EBSxA8kRfxAUsQPJEX8QFLEDyRF/EBSxA8kRfxAUsQPJEX8QFLEDyRF/EBSxA8kRfxAUsQPJEX8\nQFJ9rdJre1zSKUnTkk5HRKuMoQBUr6/4Cz+KiPdL+D0ABoin/UBS/cYfkl62vcf2SBkDARiMfp/2\nXxURx2x/XdIO2/+MiF3tNyj+UhiRpG8tL+NVBoAy9PXIHxHHivNJSS9IWjvLbUYjohURraWLh/o5\nHIAS9Ry/7QttX3zmsqTrJR0oazAA1ernefgySS/YPvN7/hQRfy5lKgCV6zn+iHhb0vdKnAXAAPFR\nH5AU8QNJET+QFPEDSRE/kBTxA0nxfdsG2PO/jzru/+lv7+m4/437Hi1zHCTBIz+QFPEDSRE/kBTx\nA0kRP5AU8QNJET+QFJ/zN8D4x0s67v/GI3/p/AvuK3EYpMEjP5AU8QNJET+QFPEDSRE/kBTxA0kR\nP5AU8QNJET+QFPEDSRE/kBTxA0kRP5AU8QNJET+Q1Lzx295ie9L2gbZti2zvsH2kOF9Y7ZhfbEP+\npOMJqEI3j/xPSlp31rb7Je2MiFWSdhbXAZxH5o0/InZJOnnW5g2SthaXt0q6qeS5AFSs19f8yyJi\norj8nqRlJc0DYED6fsMvIkJSzLXf9ojtMdtjUyem+z0cgJL0Gv9x28OSVJxPznXDiBiNiFZEtJYu\nHurxcADK1mv82yRtKi5vkvRiOeMAGJRuPup7WtJfJX3X9lHbt0vaLOk620ckXVtcB3Aemff/7Y+I\njXPsuqbkWQAMEN/wA5IifiAp4geSIn4gKeIHkiJ+ICniB5IifiAp4geSIn4gKeIHkiJ+ICniB5Ii\nfiCpef9JL3Jbf+0tHfdPH3qz4/6X/rO3zHFQIh75gaSIH0iK+IGkiB9IiviBpIgfSIr4gaSIH0iK\n+IGkiB9IiviBpIgfSIr4gaSIH0iK+IGk5v33/La3SPqxpMmIuLzY9qCkn0maKm72QERsr2pI1Ge+\nf6+P81c3j/xPSlo3y/ZHImJNcSJ84Dwzb/wRsUvSyQHMAmCA+nnNf5ftfba32F5Y2kQABqLX+B+T\ndJmkNZImJD001w1tj9gesz02dWK6x8MBKFtP8UfE8YiYjohPJD0uaW2H245GRCsiWksXD/U6J4CS\n9RS/7eG2qzdLOlDOOAAGpZuP+p6WdLWkJbaPSvqlpKttr5EUksYl3VHhjAAqMG/8EbFxls1PVDAL\nenTDN9fUPQLOQ3zDD0iK+IGkiB9IiviBpIgfSIr4gaRYovsLoMplsPtdohvNxSM/kBTxA0kRP5AU\n8QNJET+QFPEDSRE/kBTxA0kRP5AU8QNJET+QFPEDSRE/kBTxA0kRP5AU8QNJET+QFPEDSRE/kBTx\nA0kRP5AU8QNJET+Q1Lzx215h+1Xbh2wftH13sX2R7R22jxTnC6sfF0BZunnkPy3p3ohYLekHku60\nvVrS/ZJ2RsQqSTuL6wDOE/PGHxETEfF6cfmUpMOSlkvaIGlrcbOtkm6qakgA5ftcr/ltr5R0haS/\nS1oWERPFrvckLSt1MgCV6jp+2xdJek7SPRHxQfu+iAhJMcfPjdgesz02dWK6r2EBlKer+G0v0Ez4\nT0XE88Xm47aHi/3DkiZn+9mIGI2IVkS0li4eKmNmACXo5t1+S3pC0uGIeLht1zZJm4rLmyS9WP54\nAKrSzRLdV0q6TdJ+22fWgn5A0mZJz9q+XdI7kjqv5QygUeaNPyJek+Q5dl9T7jgABoVv+AFJET+Q\nFPEDSRE/kBTxA0kRP5BUN5/zo2I3fu2/Hfe/vPurA5oEmfDIDyRF/EBSxA8kRfxAUsQPJEX8QFLE\nDyTF5/wN8BUv6Lj/0eV/G9Ak59r+yrO1HRvV4pEfSIr4gaSIH0iK+IGkiB9IiviBpIgfSIr4gaSI\nH0iK+IGkiB9IiviBpIgfSIr4gaSIH0hq3vhtr7D9qu1Dtg/avrvY/qDtY7b3Fqf11Y8LoCzd/Gce\npyXdGxGv275Y0h7bO4p9j0TEr6sbD0BV5o0/IiYkTRSXT9k+LGl51YMBqNbnes1ve6WkKyT9vdh0\nl+19trfYXjjHz4zYHrM9NnViuq9hAZSn6/htXyTpOUn3RMQHkh6TdJmkNZp5ZvDQbD8XEaMR0YqI\n1tLFQyWMDKAMXcVve4Fmwn8qIp6XpIg4HhHTEfGJpMclra1uTABl6+bdfkt6QtLhiHi4bftw281u\nlnSg/PEAVKWbd/uvlHSbpP229xbbHpC00fYaSSFpXNIdlUwIoBLdvNv/miTPsmt7+eMAGBS+4Qck\nRfxAUsQPJEX8QFLEDyRF/EBSxA8kRfxAUsQPJEX8QFLEDyRF/EBSxA8kRfxAUo6IwR3MnpL0Ttum\nJZLeH9gAn09TZ2vqXBKz9arM2b4dEUu7ueFA4z/n4PZYRLRqG6CDps7W1LkkZutVXbPxtB9IiviB\npOqOf7Tm43fS1NmaOpfEbL2qZbZaX/MDqE/dj/wAalJL/LbX2f6X7bds31/HDHOxPW57f7Hy8FjN\ns2yxPWn7QNu2RbZ32D5SnM+6TFpNszVi5eYOK0vXet81bcXrgT/ttz0k6U1J10k6Kmm3pI0RcWig\ng8zB9rikVkTU/pmw7R9K+lDSHyLi8mLbrySdjIjNxV+cCyPi5w2Z7UFJH9a9cnOxoMxw+8rSkm6S\n9BPVeN91mOsW1XC/1fHIv1bSWxHxdkR8JOkZSRtqmKPxImKXpJNnbd4gaWtxeatm/vAM3ByzNUJE\nTETE68XlU5LOrCxd633XYa5a1BH/cknvtl0/qmYt+R2SXra9x/ZI3cPMYlmxbLokvSdpWZ3DzGLe\nlZsH6ayVpRtz3/Wy4nXZeMPvXFdFxPcl3SjpzuLpbSPFzGu2Jn1c09XKzYMyy8rSn6rzvut1xeuy\n1RH/MUkr2q5fUmxrhIg4VpxPSnpBzVt9+PiZRVKL88ma5/lUk1Zunm1laTXgvmvSitd1xL9b0irb\nl9q+QNKtkrbVMMc5bF9YvBEj2xdKul7NW314m6RNxeVNkl6scZbPaMrKzXOtLK2a77vGrXgdEQM/\nSVqvmXf8/y3pF3XMMMdc35H0RnE6WPdskp7WzNPAjzXz3sjtkhZL2inpiKRXJC1q0Gx/lLRf0j7N\nhDZc02xXaeYp/T5Je4vT+rrvuw5z1XK/8Q0/ICne8AOSIn4gKeIHkiJ+ICniB5IifiAp4geSIn4g\nqf8DR6qDR/5YFEoAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%matplotlib inline\n", "import skimage\n", "from skimage import transform\n", "for nn in range(10):\n", " CharacterNum = np.random.randint(len(imagedata))\n", " FileNum = 7\n", " im = imagedata[CharacterNum][FileNum]\n", " im = skimage.transform.resize(im, (28,28))\n", " plt.figure(); plt.imshow(im)\n", "\n", "print(\"Displayed.\")\n", "im.dtype\n", "\n" ] }, { "cell_type": "code", "execution_count": 99, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "torch.Size([1, 64, 15, 15]) torch.Size([1, 64, 7, 7]) torch.Size([1, 64, 3, 3]) torch.Size([1, 64, 1, 1])\n" ] } ], "source": [ "# Let us compute the final size of various possible architectures and input image sizes!\n", "import torch\n", "from torch import autograd\n", "\n", "cv1 = torch.nn.Conv2d(1, 64, 3, stride=2)\n", "#mp1 = torch.nn.MaxPool2d(2, stride=2)\n", "cv2 = torch.nn.Conv2d(64, 64, 3, stride=2)\n", "#mp2 = torch.nn.MaxPool2d(2, stride=2)\n", "cv3 = torch.nn.Conv2d(64, 64, 3, stride=2)\n", "cv4 = torch.nn.Conv2d(64, 64, 3, stride=2)\n", "mp3 = torch.nn.MaxPool2d(2, stride=1)\n", "\n", "fakeim = autograd.Variable(torch.randn(1, 1, 31, 31))\n", "outcv1 = cv1(fakeim)\n", "outcv2 = cv2(outcv1)\n", "outcv3 = cv3(outcv2)\n", "outcv4 = cv4(outcv3)\n", "#outmp3 = mp3(outcv3)\n", "print(outcv1.size(), outcv2.size(), outcv3.size(), outcv4.size())\n" ] }, { "cell_type": "code", "execution_count": 171, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "17 17 [ 152 75 1047] [0 3 0]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.5/dist-packages/skimage/transform/_warps.py:84: UserWarning: The default mode, 'constant', will be changed to 'reflect' in skimage 0.15.\n", " warn(\"The default mode, 'constant', will be changed to 'reflect' in \"\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 171, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAADtlJREFUeJzt3X/sVfV9x/HXy68wNzAtv8YQcaC1\nLqaraL6lOpmzNgqlTdBuMZqtcwkpjamZZm0icXG6NVns5o80TeeKhZUuTuuqVv4gQ2ZZjKkDvyry\nQ+pQihVEQKwVTUrL1/f+uIfsjnzP/V7uPfeeL76fj4R8z/e8z7nnnRNe33Pv+dzPvY4IAcjnpLob\nAFAPwg8kRfiBpAg/kBThB5Ii/EBShB9IivADSRF+IKmTu9nZ9kJJ35A0IOk7EXFHq+2nTh6I2bPG\ndXNIAC3seu3XevOtYbezbcfhtz0g6VuSLpe0W9IztldHxItl+8yeNU4b187q9JAARjFvwWttb9vN\n0/55kl6OiJ0R8StJD0pa3MXjAeijbsI/U1Lzn5ndxToAJ4Ce3/CzvdT2kO2hAweHe304AG3qJvx7\nJDW/gD+9WPf/RMTyiBiMiMFpUwa6OByAKnUT/mcknW17ju3xkq6RtLqatgD0Wsd3+yPiiO0bJK1V\nY6hvZURsq6wzAD3V1Th/RKyRtKaiXgD0Ee/wA5Ii/EBShB9IivADSRF+ICnCDyRF+IGkCD+QFOEH\nkiL8QFKEH0iK8ANJdTWxByemr+y9oLT2o90fbbnv8594sOp2UBOu/EBShB9IivADSRF+ICnCDyRF\n+IGkGOpL6PEHLiytnXbnj1vv/HrFzYxi0flXlNaG9+3v6DFPOuWU0tqhz55XWnvqm9/u6HhjFVd+\nICnCDyRF+IGkCD+QFOEHkiL8QFJdDfXZ3iXpkKRhSUciYrCKptBb4w9F3S20bc3zj5fWrtyxoLR2\n+MuTSmvvb/1JaW3CwxtKawsenltaW/v6ptLaWFXFOP+nIuLNCh4HQB/xtB9Iqtvwh6THbT9re2kV\nDQHoj26f9s+PiD22f1vSOts/iYgnmzco/igslaQzZvJuYmCs6OrKHxF7ip/7JT0qad4I2yyPiMGI\nGJw2ZaCbwwGoUMfhtz3B9qlHlyVdIWlrVY0B6K1unodPl/So7aOP828R8R+VdIWemvrtp0trO775\nyVH2HjtDWj88e215sXyEsGMLTisf6mtVu2zLe6W1m6fs6KqnbnQc/ojYKal8/iOAMY2hPiApwg8k\nRfiBpAg/kBThB5LiLXcfUJ+9eHGL6qullZ1//MH6kMoqtZq512qo70e/P6G0dnOfPxC1GVd+ICnC\nDyRF+IGkCD+QFOEHkiL8QFIM9Z3AlvxsfmntyE/Lh/MAiSs/kBbhB5Ii/EBShB9IivADSRF+ICmG\n+o5Tq9ly/R9ee7fyR2w1O61XDi65qLT2w9v+sbR2+skTe9FOR77202dKa7fO+UQfO2kfV34gKcIP\nJEX4gaQIP5AU4QeSIvxAUo6I1hvYKyV9TtL+iPhYsW6ypO9Lmi1pl6SrI+Lnox1s8LxTYuPaWV22\njKNaDcsd/kz58NJ/rbivF+201IshxLf/vHyIcP3ff6O09lsnja+8l7Fi3oLXNPTCL93Otu1c+b8r\naeEx65ZJeiIizpb0RPE7gBPIqOGPiCclvXXM6sWSVhXLqyRdWXFfAHqs09f80yNib7H8hhpf1w3g\nBNL1Db9o3DQovXFge6ntIdtDBw4Od3s4ABXpNPz7bM+QpOLn/rINI2J5RAxGxOC0KQMdHg5A1ToN\n/2pJ1xXL10l6rJp2APTLqLP6bD8g6VJJU23vlnSbpDskPWR7iRpf/HZ1L5vE8atjOK+VVt9z18o5\nK64vrc2+9enS2lXfm1d5Lx80o4Y/Iq4tKX264l4A9BHv8AOSIvxAUoQfSIrwA0kRfiApPsBzjKvj\nAzXHkpeW3FteXFJeanXeWtUyDQNy5QeSIvxAUoQfSIrwA0kRfiApwg8kxVDfGPD5ly9vUT1QWnn/\nj85vsV+eIauR/OJPLyytfej+/+5jJ2MXV34gKcIPJEX4gaQIP5AU4QeSIvxAUgz1jQHvXVI+nNfK\nugf+peJOkAlXfiApwg8kRfiBpAg/kBThB5Ii/EBS7XxX30pJn5O0PyI+Vqy7XdIX9X9Tzm6JiDW9\navJEseilRaW14U+93tFj9vsDJbv5wNBMH375QdDOlf+7khaOsP6eiJhb/EsffOBEM2r4I+JJSW/1\noRcAfdTNa/4bbG+2vdL2pMo6AtAXnYb/XklnSZoraa+ku8o2tL3U9pDtoQMHhzs8HICqdRT+iNgX\nEcMR8b6k+yTNa7Ht8ogYjIjBaVMGOu0TQMU6Cr/tGU2/XiVpazXtAOiXdob6HpB0qaSptndLuk3S\npbbnSgpJuyR9qYc9AuiBUcMfEdeOsHpFD3o54XU6lv/6V/+gRbW/Y+ete5FOu/PHpbWx9AWYfELv\n6HiHH5AU4QeSIvxAUoQfSIrwA0kRfiApPr33OC2c88kW1cOllR3fKt9v51X/1EVH1dryV617OXvq\n9aW1M5c9XXU7Lc3f/PnS2gTtLK0x9biBKz+QFOEHkiL8QFKEH0iK8ANJEX4gKYb6jlMcLh/O27Os\nfEbcWBrO68ZJc97r6/FW/OJ3SmsTFpYP5w2c85EWj8pQn8SVH0iL8ANJEX4gKcIPJEX4gaQIP5AU\nQ30jOGdF+cy12Sqfubb1Lz8Yw3mtvPSH3yutLTr36tLagtPKH3PX1y4qrc2+tbOZgmvW/6Cj/TLh\nyg8kRfiBpAg/kBThB5Ii/EBSo4bf9izb622/aHub7RuL9ZNtr7O9o/g5qfftAqiKI6L1Bo1v5J0R\nEc/ZPlXSs5KulPQXkt6KiDtsL5M0KSJubvVYg+edEhvXzqqm8y499O6HSmsrPjqno8fM/sGQPzvy\nbmnti2fMr/x42c/3SOYteE1DL/zS7Ww76pU/IvZGxHPF8iFJ2yXNlLRY0qpis1Vq/EEAcII4rtf8\ntmdLOl/SBknTI2JvUXpD0vRKOwPQU22H3/ZESQ9Luiki3mmuReO1w4ivH2wvtT1ke+jAweGumgVQ\nnbbCb3ucGsG/PyIeKVbvK+4HHL0vsH+kfSNieUQMRsTgtCkDVfQMoALt3O23pBWStkfE3U2l1ZKu\nK5avk/RY9e0B6JV2JvZcLOkLkrbYPnp79RZJd0h6yPYSSa9KKp/VAWDMGTX8EfGUpLKhg09X207/\nMJxXvTNOnlha47yNPbzDD0iK8ANJEX4gKcIPJEX4gaQIP5BU2g/wZOgJ2XHlB5Ii/EBShB9IivAD\nSRF+ICnCDyRF+IGkCD+QFOEHkiL8QFKEH0iK8ANJEX4gKcIPJEX4gaQIP5AU4QeSIvxAUoQfSIrw\nA0m18y29s2yvt/2i7W22byzW3257j+1Nxb9FvW8XQFXa+fTeI5K+EhHP2T5V0rO21xW1eyLizt61\nB6BX2vmW3r2S9hbLh2xvlzSz140B6K3jes1ve7ak8yVtKFbdYHuz7ZW2J5Xss9T2kO2hAweHu2oW\nQHXaDr/tiZIelnRTRLwj6V5JZ0maq8Yzg7tG2i8ilkfEYEQMTpsyUEHLAKrQVvhtj1Mj+PdHxCOS\nFBH7ImI4It6XdJ+keb1rE0DV2rnbb0krJG2PiLub1s9o2uwqSVurbw9Ar7Rzt/9iSV+QtMX20S+4\nu0XStbbnSgpJuyR9qScdAuiJdu72PyXJI5TWVN8OgH7hHX5AUoQfSIrwA0kRfiApwg8kRfiBpAg/\nkBThB5Ii/EBShB9IivADSRF+ICnCDyRF+IGkCD+QFOEHkiL8QFKEH0iK8ANJEX4gKcIPJEX4gaQI\nP5AU4QeSIvxAUoQfSKqdL+o8xfZG2y/Y3mb7b4v1c2xvsP2y7e/bHt/7dgFUpZ0r/2FJl0XEeZLm\nSlpo+0JJX5d0T0R8RNLPJS3pXZsAqjZq+KPh3eLXccW/kHSZpB8U61dJurInHQLoibZe89seKL6e\ne7+kdZJekfR2RBwpNtktaWbJvkttD9keOnBwuIqeAVSgrfBHxHBEzJV0uqR5kn6v3QNExPKIGIyI\nwWlTBjpsE0DVjutuf0S8LWm9pIskfdj2yUXpdEl7Ku4NQA+1c7d/mu0PF8u/KelySdvV+CPwJ8Vm\n10l6rFdNAqieI6L1BvbH1bihN6DGH4uHIuLvbJ8p6UFJkyU9L+nPIuLwKI91QNKrxa9TJb3ZXfuV\nGkv90MvI6GVkzb38bkRMa2enUcPfK7aHImKwloOPYCz1Qy8jo5eRddoL7/ADkiL8QFJ1hn95jcce\nyVjqh15GRi8j66iX2l7zA6gXT/uBpGoJv+2Ftl8qZgQuq6OHpl522d5ie5PtoT4fe6Xt/ba3Nq2b\nbHud7R3Fz0k19nK77T3Fudlke1Gfeplle73tF4uZpDcW6/t+blr00vdzU/kM24jo6z813i/wiqQz\nJY2X9IKkc/vdR1M/uyRNrenYl0i6QNLWpnX/IGlZsbxM0tdr7OV2SV+t4bzMkHRBsXyqpP+RdG4d\n56ZFL30/N5IsaWKxPE7SBkkXSnpI0jXF+n+WdH07j1fHlX+epJcjYmdE/EqNNwotrqGP2kXEk5Le\nOmb1YjXeVCX1cbZkSS+1iIi9EfFcsXxIjXeUzlQN56ZFL30XDZXNsK0j/DMlvdb0e+mMwD4JSY/b\nftb20hr7OGp6ROwtlt+QNL3OZiTdYHtz8bKgLy9BmtmeLel8Na5ytZ6bY3qRajg33cywPRY3/KT5\nEXGBpM9I+rLtS+pu6KhoPI+rczjmXklnqfEhLnsl3dXPg9ueKOlhSTdFxDvNtX6fmxF6qeXcRBcz\nbI9VR/j3SJrV9HutMwIjYk/xc7+kR9U4oXXaZ3uGJBU/99fVSETsK/6zvS/pPvXx3Ngep0bY7o+I\nR4rVtZybkXqp89wUx+96hm0d4X9G0tnFHcrxkq6RtLqGPmR7gu1Tjy5LukLS1tZ79dxqNWZJSjXP\nljwatMJV6tO5sW1JKyRtj4i7m0p9PzdlvdRxbiqfYdvPu5VNdy0XqXHX9BVJf11HD0UfZ6ox2vCC\npG397kXSA2o8Zfy1Gq/VlkiaIukJSTsk/aekyTX28q+StkjarEbwZvSpl/lqPKXfLGlT8W9RHeem\nRS99PzeSPq7GDNrNavyx+Zum/8cbJb0s6d8l/UY7j8c7/ICkuOEHJEX4gaQIP5AU4QeSIvxAUoQf\nSIrwA0kRfiCp/wWs7N+iHs8dGwAAAABJRU5ErkJggg==\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAADL5JREFUeJzt3X+o3fV9x/Hna2ncVnXUxCxkMVuq\nk65SbJRLaFFKf8yaZoMoG0VhxYGQMiYodLDQweb2lx1Ttz+KI06pDGfrVsXARJuJIHZDvboYo1nn\nD9JpGpMYK7r+0db43h/nG7gL9+Ye7/mecxI/zwcc7vd8f5zviy/3db/n+/2e7z2pKiS15xemHUDS\ndFh+qVGWX2qU5ZcaZfmlRll+qVGWX2qU5ZcaZfmlRn1olIWTbAL+DlgG/ENV3XSi+c9esazWr1s+\nyiolncC+V3/OG28ezTDzLrn8SZYB3wQuA14Dnkqyo6peWGiZ9euW8+TD65a6SkmL2Hj5q0PPO8rb\n/o3AS1X1SlX9DPg2sGWE15M0QaOUfy0w98/Ma904SaeAsZ/wS7I1yWyS2cNHjo57dZKGNEr59wNz\nD+DP6cb9P1W1vapmqmpm1cplI6xOUp9GKf9TwPlJPprkNOAqYEc/sSSN25LP9lfVu0muAx5mcKnv\nzqp6vrdkksZqpOv8VfUg8GBPWSRNkJ/wkxpl+aVGWX6pUZZfapTllxpl+aVGWX6pUZZfapTllxpl\n+aVGWX6pUZZfapTllxpl+aVGWX6pUZZfapTllxpl+aVGWX6pUZZfapTllxpl+aVGWX6pUZZfapTl\nlxpl+aVGjfR1XUn2Ae8AR4F3q2qmj1CSxm+k8nc+V1Vv9PA6kibIt/1So0YtfwHfS/J0kq19BJI0\nGaO+7b+0qvYn+VVgZ5L/qqrH5s7Q/VHYCvDra/s4ypDUh5H2/FW1v/t5CLgf2DjPPNuraqaqZlat\nXDbK6iT1aMnlT3J6kjOPDQNfBPb0FUzSeI3yPnw1cH+SY6/zT1X1UC+pJI3dkstfVa8An+wxi6QJ\n8lKf1CjLLzXK8kuNsvxSoyy/1CjLLzXK8kuNsvxSoyy/1CjLLzXK8kuNsvxSo5r97xqX/9qGaUfQ\nEP52378vOO3jp314gkk+eNzzS42y/FKjLL/UKMsvNcryS42y/FKjmr3U95OHzp12BHVO3/TKtCM0\nyT2/1CjLLzXK8kuNsvxSoyy/1CjLLzVq0Ut9Se4Efhc4VFWf6MatAL4DrAf2AV+uqh+PL2b/Hr/w\nvmlHUOdyvMNyGobZ838L2HTcuG3AI1V1PvBI91zSKWTR8lfVY8Cbx43eAtzVDd8FXNFzLkljttRj\n/tVVdaAbfp3B13VLOoWMfMKvqgqohaYn2ZpkNsns4SNHR12dpJ4stfwHk6wB6H4eWmjGqtpeVTNV\nNbNq5bIlrk5S35Za/h3ANd3wNcAD/cSRNCmLlj/JPcB/AB9L8lqSa4GbgMuSvAj8dvdc0ilk0ev8\nVXX1ApO+0HMWSRPkJ/ykRll+qVGWX2qU5ZcaZfmlRll+qVGWX2qU5ZcaZfmlRll+qVGWX2qU5Zca\n1ex39WmyvvnWuiUt9/HTPtxzEh3jnl9qlOWXGmX5pUZZfqlRll9qlOWXGuWlPk3EjgtWTjuCjuOe\nX2qU5ZcaZfmlRll+qVGWX2qU5ZcaNcx39d2Z5FCSPXPG3Zhkf5Jd3WPzeGNK6tswe/5vAZvmGX9r\nVW3oHg/2G0vSuC1a/qp6DHhzAlkkTdAox/zXJdndHRac1VsiSROx1PLfBpwHbAAOADcvNGOSrUlm\nk8wePnJ0iauT1Lcllb+qDlbV0ap6D7gd2HiCebdX1UxVzaxauWypOSX1bEnlT7JmztMrgT0LzSvp\n5LToXX1J7gE+C5yd5DXgL4DPJtkAFLAP+OoYM0oag0XLX1VXzzP6jjFkkTRBfsJPapTllxpl+aVG\nWX6pUZZfapTllxpl+aVGWX6pUZZfapTllxpl+aVGWX6pUZZfapTllxpl+aVGWX6pUZZfapTllxpl\n+aVGWX6pUZZfapTllxpl+aVGWX6pUZZfapTllxq1aPmTrEvyaJIXkjyf5Ppu/IokO5O82P08a/xx\nJfVl0e/qA94FvlZVzyQ5E3g6yU7gD4FHquqmJNuAbcCfji/q/D797O8tOO0TK15fcNrt674/jjjS\nKWPRPX9VHaiqZ7rhd4C9wFpgC3BXN9tdwBXjCimpf+/rmD/JeuAi4AlgdVUd6Ca9DqzuNZmksRq6\n/EnOAL4L3FBVb8+dVlUF1ALLbU0ym2T28JGjI4WV1J+hyp9kOYPi311V93WjDyZZ001fAxyab9mq\n2l5VM1U1s2rlsj4yS+rBMGf7A9wB7K2qW+ZM2gFc0w1fAzzQfzxJ4zLM2f5LgK8AzyXZ1Y37OnAT\ncG+Sa4EfAl8eT0RJ47Bo+avqcSALTP5Cv3Hev1/50ssLTvufEyx3ORv6D/MB8JOHzl3yso9feN/i\nM+mk4Sf8pEZZfqlRll9qlOWXGmX5pUZZfqlRw1znP6k9/KNdC07b/IPNE0wyHkc/96OJru/0Ta8s\neVkvn55a3PNLjbL8UqMsv9Qoyy81yvJLjbL8UqNO+Ut9J/Lgxx6cdoTRTfZK39j8ziVbFpz2r9/3\nX0FMg3t+qVGWX2qU5ZcaZfmlRll+qVGWX2rUB/pSn04eXs47+bjnlxpl+aVGWX6pUZZfapTllxpl\n+aVGDfMtveuSPJrkhSTPJ7m+G39jkv1JdnWPU/+/ZUoNGeY6/7vA16rqmSRnAk8n2dlNu7Wq/mZ8\n8SSNyzDf0nsAONANv5NkL7B23MEkjdf7OuZPsh64CHiiG3Vdkt1J7kxy1gLLbE0ym2T28JGjI4WV\n1J+hy5/kDOC7wA1V9TZwG3AesIHBO4Ob51uuqrZX1UxVzaxauayHyJL6MFT5kyxnUPy7q+o+gKo6\nWFVHq+o94HZg4/hiSurbMGf7A9wB7K2qW+aMXzNntiuBPf3HkzQuw5ztvwT4CvBckmNfjPd14Ook\nG4AC9gFfHUtCSWMxzNn+x4HMM+kD8K9xpXb5CT+pUZZfapTllxpl+aVGWX6pUZZfapTllxpl+aVG\nWX6pUZZfapTllxpl+aVGWX6pUZZfapTllxpl+aVGWX6pUZZfapTllxpl+aVGWX6pUZZfapTllxpl\n+aVGWX6pUZZfatQwX9T5S0meTPJskueT/GU3/qNJnkjyUpLvJDlt/HEl9WWYPf9Pgc9X1SeBDcCm\nJJ8CvgHcWlW/CfwYuHZ8MSX1bdHy18D/dk+Xd48CPg/8Szf+LuCKsSSUNBZDHfMnWdZ9PfchYCfw\nMvBWVb3bzfIasHaBZbcmmU0ye/jI0T4yS+rBUOWvqqNVtQE4B9gI/NawK6iq7VU1U1Uzq1YuW2JM\nSX17X2f7q+ot4FHg08BHknyom3QOsL/nbJLGaJiz/auSfKQb/mXgMmAvgz8Cv9/Ndg3wwLhCSupf\nqurEMyQXMjiht4zBH4t7q+qvkpwLfBtYAfwn8AdV9dNFXusw8MPu6dnAG6PF79XJlMcs8zPL/OZm\n+Y2qWjXMQouWf1ySzFbVzFRWPo+TKY9Z5meW+S01i5/wkxpl+aVGTbP826e47vmcTHnMMj+zzG9J\nWaZ2zC9punzbLzVqKuVPsinJD7o7ArdNI8OcLPuSPJdkV5LZCa/7ziSHkuyZM25Fkp1JXux+njXF\nLDcm2d9tm11JNk8oy7okjyZ5obuT9Ppu/MS3zQmyTHzb9H6HbVVN9MHg8wIvA+cCpwHPAhdMOsec\nPPuAs6e07s8AFwN75oz7a2BbN7wN+MYUs9wI/MkUtssa4OJu+Ezgv4ELprFtTpBl4tsGCHBGN7wc\neAL4FHAvcFU3/u+BPxrm9aax598IvFRVr1TVzxh8UGjLFHJMXVU9Brx53OgtDD5UBRO8W3KBLFNR\nVQeq6plu+B0GnyhdyxS2zQmyTFwN9HaH7TTKvxZ4dc7zBe8InJACvpfk6SRbp5jjmNVVdaAbfh1Y\nPc0wwHVJdneHBRM5BJkryXrgIgZ7ualum+OywBS2zSh32B7PE35waVVdDHwJ+OMkn5l2oGNq8D5u\nmpdjbgPOY/BPXA4AN09y5UnOAL4L3FBVb8+dNultM0+WqWybGuEO2+NNo/z7gXVznk/1jsCq2t/9\nPATcz2CDTtPBJGsAup+HphWkqg52v2zvAbczwW2TZDmDst1dVfd1o6eybebLMs1t061/5Dtsp1H+\np4DzuzOUpwFXATumkIMkpyc589gw8EVgz4mXGrsdDO6ShCnfLXmsaJ0rmdC2SRLgDmBvVd0yZ9LE\nt81CWaaxbXq/w3aSZyvnnLXczOCs6cvAn00jQ5fjXAZXG54Fnp90FuAeBm8Zf87gWO1aYCXwCPAi\n8G/Aiilm+UfgOWA3g+KtmVCWSxm8pd8N7Ooem6exbU6QZeLbBriQwR20uxn8sfnzOb/HTwIvAf8M\n/OIwr+cn/KRGecJPapTllxpl+aVGWX6pUZZfapTllxpl+aVGWX6pUf8HFoc1jdcuwZUAAAAASUVO\nRK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAADllJREFUeJzt3XGMHOV5x/Hv06shKSAFG2M5xtRA\nIQ1KwaZXiwgakZCA60Y1VBWFNimtKI5akIKU/oGo2tBUQiQqoFRtiUxwIBEhIQWEVbkFaiFR1BZz\nUDAGl2I7RtgY25ikkD9C8PH0j52TrtbN3rI7u2P7/X6k087OO7PzMPi3szvvvDuRmUgqz8+1XYCk\ndhh+qVCGXyqU4ZcKZfilQhl+qVCGXyqU4ZcKZfilQv38ICtHxArg68AY8M3MvLnb8ifMHcsli+cM\nsklJXex49V3eeHMyelm27/BHxBjw98BngJ3AUxGxLjNfrFtnyeI5bHx4cb+blDSL5Re/2vOyg3zs\nXw5szcztmfkz4HvAqgFeT9IIDRL+RcD0t5md1TxJh4Ghn/CLiNURMRERE/v2Tw57c5J6NEj4dwHT\nv8CfVM37fzJzTWaOZ+b4/HljA2xOUpMGCf9TwOkRcUpEHAVcDqxrpixJw9b32f7MPBAR1wIP0+nq\nW5uZLzRWmaShGqifPzPXA+sbqkXSCHmFn1Qowy8VyvBLhTL8UqEMv1Qowy8VyvBLhTL8UqEMv1Qo\nwy8VyvBLhTL8UqEMv1Qowy8VyvBLhTL8UqEMv1Qowy8VyvBLhTL8UqEMv1Qowy8VyvBLhTL8UqEM\nv1Qowy8VaqDbdUXEDuBtYBI4kJnjTRQlafgGCn/lk5n5RgOvI2mE/NgvFWrQ8CfwSEQ8HRGrmyhI\n0mgM+rH//MzcFREnAo9GxH9n5uPTF6jeFFYDnLyoiW8Zkpow0JE/M3dVj3uBB4HlMyyzJjPHM3N8\n/ryxQTYnqUF9hz8ijomI46amgYuAzU0VJmm4BvkcvgB4MCKmXue7mfkvjVQlaej6Dn9mbgfObrAW\nSSNkV59UKMMvFcrwS4Uy/FKhDL9UKMMvFcrwS4Uy/FKhDL9UKMMvFcrwS4Uy/FKh/HUNNWblSytr\n2xZ+8K3atjtPfmIY5WgWHvmlQhl+qVCGXyqU4ZcKZfilQhl+qVB29R0Cdh74SW3bVSef3/j2Hn7t\n2b7XvfjDS7u0vlbbsrPLWhed94e1bY/84K7ZSlKfPPJLhTL8UqEMv1Qowy8VyvBLhTL8UqFm7eqL\niLXAZ4G9mfmxat5c4PvAEmAHcFlm/mh4ZR7Z+u3O69Zl161L7iP/9ge1bS/9+rf7qgXgIxNz6tt+\n4fXatnVn1v93TOZ7tW1j4bFrEL3svbuAFQfNux7YkJmnAxuq55IOI7OGPzMfB948aPYq4O5q+m7g\nkobrkjRk/X5uWpCZu6vp1+ncrlvSYWTgL02ZmUDWtUfE6oiYiIiJffsnB92cpIb0G/49EbEQoHrc\nW7dgZq7JzPHMHJ8/b6zPzUlqWr/hXwdcWU1fCTzUTDmSRqWXrr57gQuAEyJiJ/Bl4Gbgvoi4CngF\nuGyYRQ5D99Fp/Ylf+5XatveOqv/UE3QZZbfhpC5b7G903pLf3VTbdsqaq7uuewZP1bb97Yfr27pZ\nx7zatpWLzqlt63d0YrdRlDft+XRt2z8s+s++tneomjX8mXlFTdOFDdciaYS8SkIqlOGXCmX4pUIZ\nfqlQhl8qVLE/4PlbL+7va731F51V23bgqedr217+1q/Wtn1064n12/voP/VWWEPOWN29u27/H3+8\nS2v/Pww6St1HUf60vqn+90kPSx75pUIZfqlQhl8qlOGXCmX4pUIZfqlQxXb1XfOhV/tbb2P9et1G\nCp7xR0/Xtq0f4N55dbZ/t76WU3+v/+1NfOX2vtcdpQuuqh+deHSXkYnbv3b4d2X2yiO/VCjDLxXK\n8EuFMvxSoQy/VCjDLxWq2K6+I12/3Xn9/ijmIHb8dX332pK/+I++XvPof67vzhubP7+27eXPHR5d\nmU3wyC8VyvBLhTL8UqEMv1Qowy8VyvBLhYrOHba7LBCxFvgssDczP1bNuxG4GthXLXZDZq6fbWPj\nZ38gNz68eKCC2/ab562qbTvww1dq29roQjsSDOOeikfy/4vlF7/KxHM/jV6W7eXIfxewYob5t2Xm\n0upv1uBLOrTMGv7MfBx4cwS1SBqhQb7zXxsRmyJibUQc31hFkkai3/DfDpwGLAV2A7fULRgRqyNi\nIiIm9u2f7HNzkprWV/gzc09mTmbme8AdwPIuy67JzPHMHJ8/b6zfOiU1rK/wR8TCaU8vBTY3U46k\nUZl1VF9E3AtcAJwQETuBLwMXRMRSIIEdwBeGWKOkIZg1/Jl5xQyz7xxCLUe0YfRXq96nN7/ddgmH\nPK/wkwpl+KVCGX6pUIZfKpThlwpl+KVC+eu979PZD/ywtu3pZfXvpUfyMFIdnjzyS4Uy/FKhDL9U\nKMMvFcrwS4Uy/FKh7Op7n25asKm27WIcuafDh0d+qVCGXyqU4ZcKZfilQhl+qVCGXyqUXX0jcv6m\n365te+KsB0ZYidThkV8qlOGXCmX4pUIZfqlQhl8q1Kzhj4jFEfFYRLwYES9ExBer+XMj4tGIeLl6\nPH745UpqSi9H/gPAlzLzTOBc4JqIOBO4HtiQmacDG6rnkg4Ts4Y/M3dn5jPV9NvAFmARsAq4u1rs\nbuCSYRUpqXnv6zt/RCwBlgFPAgsyc3fV9DqwoNHKJA1Vz+GPiGOB+4HrMvOt6W2ZmUDWrLc6IiYi\nYmLf/smBipXUnJ7CHxFz6AT/nsycuhZ1T0QsrNoXAntnWjcz12TmeGaOz5831kTNkhrQy9n+AO4E\ntmTmrdOa1gFXVtNXAg81X56kYellYM95wOeB5yNi6p5TNwA3A/dFxFXAK8BlwylR0jDMGv7MfAKI\nmuYLmy3nyHXMiu31ja+Nrg5pilf4SYUy/FKhDL9UKMMvFcrwS4Uy/FKhDL9UKMMvFcrwS4Uy/FKh\nDL9UKMMvFcrwS4Uy/FKhDL9UKMMvFcrwS4Uy/FKhDL9UKMMvFcrwS4Uy/FKhDL9UKMMvFcrwS4Uy\n/FKhDL9UqFnv1RcRi4FvAwuABNZk5tcj4kbgamBftegNmbl+WIUeKlacPN6l9UBty7Z7lnVZ79ku\nbdJw9HKX3gPAlzLzmYg4Dng6Ih6t2m7LzL8ZXnmShqWXu/TuBnZX029HxBZg0bALkzRc7+s7f0Qs\nAZYBT1azro2ITRGxNiKOr1lndURMRMTEvv2TAxUrqTk9hz8ijgXuB67LzLeA24HTgKV0PhncMtN6\nmbkmM8czc3z+vLEGSpbUhJ7CHxFz6AT/nsx8ACAz92TmZGa+B9wBLB9emZKaNmv4IyKAO4EtmXnr\ntPkLpy12KbC5+fIkDUsvZ/vPAz4PPB8RU31SNwBXRMRSOt1/O4AvDKXCFiy76U9r20488O+1bf/7\n++fWtm395DcGqklqWi9n+58AYoamI75PXzqSeYWfVCjDLxXK8EuFMvxSoQy/VKheuvqOSBvfebe2\n7cS/q+/O2/qd+tF52y60O0+HD4/8UqEMv1Qowy8VyvBLhTL8UqEMv1SoYrv6Xjsw4w8PARBzjqpt\n23bht4ZRjjRyHvmlQhl+qVCGXyqU4ZcKZfilQhl+qVDFdvVdcsxP6tte2TjCSqR2eOSXCmX4pUIZ\nfqlQhl8qlOGXCmX4pUL1cqPOD0TExoh4LiJeiIi/quafEhFPRsTWiPh+RNQPhZN0yOnlyP8O8KnM\nPBtYCqyIiHOBrwK3ZeYvAT8CrhpemZKaNmv4s2Pqipg51V8CnwL+sZp/N3DJUCqUNBQ9feePiLHq\n9tx7gUeBbcCPM/NAtchOYFHNuqsjYiIiJvbtn2yiZkkN6Cn8mTmZmUuBk4DlwC/3uoHMXJOZ45k5\nPn/eWJ9lSmra+zrbn5k/Bh4DPg58KCKmxgacBOxquDZJQ9TL2f75EfGhavqDwGeALXTeBH6nWuxK\n4KFhFSmpeZGZ3ReIOIvOCb0xOm8W92XmVyLiVOB7wFzgv4DPZeY7s7zWPuCV6ukJwBuDld+oQ6ke\na5mZtcxsei2/mJnze1lp1vAPS0RMZOZ4KxufwaFUj7XMzFpm1m8tXuEnFcrwS4VqM/xrWtz2TA6l\neqxlZtYys75qae07v6R2+bFfKlQr4Y+IFRHxUjUi8Po2aphWy46IeD4ino2IiRFve21E7I2IzdPm\nzY2IRyPi5eqx/qaCw6/lxojYVe2bZyNi5YhqWRwRj0XEi9VI0i9W80e+b7rUMvJ90/gI28wc6R+d\n6wW2AacCRwHPAWeOuo5p9ewATmhp258AzgE2T5v3NeD6avp64Kst1nIj8Gct7JeFwDnV9HHA/wBn\ntrFvutQy8n0DBHBsNT0HeBI4F7gPuLya/w3gT3p5vTaO/MuBrZm5PTN/RudCoVUt1NG6zHwcePOg\n2avoXFQFIxwtWVNLKzJzd2Y+U02/TeeK0kW0sG+61DJy2dHYCNs2wr8IeHXa89oRgSOSwCMR8XRE\nrG6xjikLMnN3Nf06sKDNYoBrI2JT9bVgJF9BpouIJcAyOke5VvfNQbVAC/tmkBG2B/OEH5yfmecA\nvwFcExGfaLugKdn5HNdmd8ztwGl0fsRlN3DLKDceEccC9wPXZeZb09tGvW9mqKWVfZMDjLA9WBvh\n3wUsnva81RGBmbmretwLPEhnh7ZpT0QsBKge97ZVSGbuqf6xvQfcwQj3TUTMoRO2ezLzgWp2K/tm\nplra3DfV9gceYdtG+J8CTq/OUB4FXA6sa6EOIuKYiDhuahq4CNjcfa2hW0dnlCS0PFpyKmiVSxnR\nvomIAO4EtmTmrdOaRr5v6mppY980PsJ2lGcrp521XEnnrOk24M/bqKGq41Q6vQ3PAS+MuhbgXjof\nGd+l813tKmAesAF4GfhXYG6LtXwHeB7YRCd4C0dUy/l0PtJvAp6t/la2sW+61DLyfQOcRWcE7SY6\nbzZ/Oe3f8UZgK/AD4OheXs8r/KRCecJPKpThlwpl+KVCGX6pUIZfKpThlwpl+KVCGX6pUP8HziGw\nVyOGQaIAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAADLJJREFUeJzt3X/IXYV9x/H3t0+jHRqoiVlIY7ZU\nJxul0yjPQkdD6dpZUylExygKKxnIUroJCh1bsLC6/WXLVEbZ3OIMDcXZuqkzMLc0FUEcRX3iYoxm\nmz8WMTEmMWnR/TFrHr/7457A0/Dc57k+99x7knzfL3i4555z7j0fDvncc8+v3MhMJNXzoa4DSOqG\n5ZeKsvxSUZZfKsryS0VZfqkoyy8VZfmloiy/VNSHh3lxRKwH/gqYAP4+M2+fa/4Ll0zk6lWLhlmk\npDnsf/093jo+HYPMu+DyR8QE8NfAVcAB4JmI2J6ZL/Z7zepVi3h6x6qFLlLSPNZe/frA8w7ztX8t\n8HJmvpqZPwO+D2wY4v0kjdEw5V8JzPyYOdCMk3QGGPkBv4jYFBFTETF19Nj0qBcnaUDDlP8gMHMH\n/qJm3M/JzC2ZOZmZk8uWTgyxOEltGqb8zwCXRsTHI+Ic4HpgezuxJI3ago/2Z+aJiLgJ2EHvVN/W\nzHyhtWSSRmqo8/yZ+SjwaEtZJI2RV/hJRVl+qSjLLxVl+aWiLL9UlOWXirL8UlGWXyrK8ktFWX6p\nKMsvFWX5paIsv1SU5ZeKsvxSUZZfKsryS0VZfqkoyy8VZfmloiy/VJTll4qy/FJRll8qyvJLRVl+\nqaihfq4rIvYD7wDTwInMnGwjlKTRG6r8jd/KzLdaeB9JY+TXfqmoYcufwA8jYldEbGojkKTxGPZr\n/7rMPBgRvwjsjIj/zMwnZs7QfChsAvillW3sZUhqw1Bb/sw82DweAR4G1s4yz5bMnMzMyWVLJ4ZZ\nnKQWLbj8EXFeRCw+OQx8AdjbVjBJozXM9/DlwMMRcfJ9/iEz/62VVJJGbsHlz8xXgctbzCJpjDzV\nJxVl+aWiLL9UlOWXirL8UlFn9SV3k9/8Wt9py3/0Rt9p//Lvj4wijnRaccsvFWX5paIsv1SU5ZeK\nsvxSUZZfKuqsPtX3kePv95124n9e6zvt6o+tGUUcnSZ2vLG76winBbf8UlGWXyrK8ktFWX6pKMsv\nFWX5paLO6lN9T37n7/pP/E7/Sev2/E77YdS6o7uW9522+hs/HmOSM5Nbfqkoyy8VZfmloiy/VJTl\nl4qy/FJR857qi4itwJeAI5n5yWbcEuAHwGpgP/DlzPzJ6GKO15OXPdR1BA1gHZ6SHcYgW/7vAutP\nGbcZeCwzLwUea55LOoPMW/7MfAI4fsroDcC2ZngbcG3LuSSN2EL3+Zdn5qFm+E16P9ct6Qwy9AG/\nzEwg+02PiE0RMRURU0ePTQ+7OEktWWj5D0fECoDm8Ui/GTNzS2ZOZubksqUTC1ycpLYttPzbgY3N\n8EbA37eSzjDzlj8i7gd+DPxqRByIiBuB24GrIuIl4Leb55LOIPOe58/MG/pM+nzLWSSNkVf4SUVZ\nfqkoyy8VZfmloiy/VJTll4qy/FJRll8qyvJLRVl+qSjLLxVl+aWizurf6tPZ7bz1r3Yd4Yzmll8q\nyvJLRVl+qSjLLxVl+aWiLL9UlOWXirL8UlGWXyrK8ktFWX6pKMsvFWX5paLmvasvIrYCXwKOZOYn\nm3G3AX8AHG1muzUzHx1VSOmD2vHG7q4jnPYG2fJ/F1g/y/i7MnNN82fxpTPMvOXPzCeA42PIImmM\nhtnnvyki9kTE1oi4oLVEksZioeW/G7gEWAMcAu7oN2NEbIqIqYiYOnpseoGLk9S2BZU/Mw9n5nRm\nvg/cA6ydY94tmTmZmZPLlk4sNKekli2o/BGxYsbT64C97cSRNC6DnOq7H/gscGFEHAC+CXw2ItYA\nCewHvjrCjJJGYN7yZ+YNs4y+dwRZJI2RV/hJRVl+qSjLLxVl+aWiLL9UlOWXiir7Q51Xf2xN1xHU\nePeLv7Gg153LMy0nqcUtv1SU5ZeKsvxSUZZfKsryS0VZfqmosqf6WPvrXSco5UP/917faef+q6fs\nuuCWXyrK8ktFWX6pKMsvFWX5paIsv1RU2VN9O/75e11H0JAu//YfzjHVH+qcj1t+qSjLLxVl+aWi\nLL9UlOWXipq3/BGxKiIej4gXI+KFiLi5Gb8kInZGxEvN4wWjjyupLYOc6jsBfD0zn42IxcCuiNgJ\n/D7wWGbeHhGbgc3An44uqvTznvuTv+k6whlt3i1/Zh7KzGeb4XeAfcBKYAOwrZltG3DtqEJKat8H\n2uePiNXAFcBTwPLMPNRMehNY3moySSM1cPkj4nzgQeCWzHx75rTMTCD7vG5TRExFxNTRY9NDhZXU\nnoHKHxGL6BX/vsx8qBl9OCJWNNNXAEdme21mbsnMycycXLZ0oo3MklowyNH+AO4F9mXmnTMmbQc2\nNsMbgUfajydpVAY52v9p4CvA8xFx8m6JW4HbgQci4kbgNeDLo4koaRTmLX9mPglEn8mfbzeOpHHx\nCj+pKMsvFWX5paIsv1SU5ZeKsvxSUZZfKsryS0VZfqkoyy8VZfmloiy/VJTll4qy/FJRll8qyvJL\nRVl+qSjLLxVl+aWiLL9UlOWXirL8UlGWXyrK8ktFWX6pKMsvFWX5paIG+ZXeVRHxeES8GBEvRMTN\nzfjbIuJgROxu/q4ZfVxJbRnkV3pPAF/PzGcjYjGwKyJ2NtPuysy/HF08SaMyyK/0HgIONcPvRMQ+\nYOWog0karQ+0zx8Rq4ErgKeaUTdFxJ6I2BoRF/R5zaaImIqIqaPHpocKK6k9A5c/Is4HHgRuycy3\ngbuBS4A19L4Z3DHb6zJzS2ZOZubksqUTLUSW1IaByh8Ri+gV/77MfAggMw9n5nRmvg/cA6wdXUxJ\nbRvkaH8A9wL7MvPOGeNXzJjtOmBv+/EkjcogR/s/DXwFeD4idjfjbgVuiIg1QAL7ga+OJKGkkRjk\naP+TQMwy6dH240gaF6/wk4qy/FJRll8qyvJLRVl+qSjLLxVl+aWiLL9UlOWXirL8UlGWXyrK8ktF\nWX6pKMsvFWX5paIsv1SU5ZeKsvxSUZZfKsryS0VZfqkoyy8VZfmloiy/VJTll4qy/FJRg/xQ50ci\n4umIeC4iXoiIP2/GfzwinoqIlyPiBxFxzujjSmrLIFv+d4HPZeblwBpgfUR8CvgWcFdm/grwE+DG\n0cWU1LZ5y589/9s8XdT8JfA54J+a8duAa0eSUNJIDLTPHxETzc9zHwF2Aq8AP83ME80sB4CVfV67\nKSKmImLq6LHpNjJLasFA5c/M6cxcA1wErAV+bdAFZOaWzJzMzMllSycWGFNS2z7Q0f7M/CnwOPCb\nwEcj4sPNpIuAgy1nkzRCgxztXxYRH22GfwG4CthH70Pgd5vZNgKPjCqkpPZFZs49Q8Rl9A7oTdD7\nsHggM/8iIi4Gvg8sAf4D+L3MfHee9zoKvNY8vRB4a7j4rTqd8phldmaZ3cwsv5yZywZ50bzlH5WI\nmMrMyU4WPovTKY9ZZmeW2S00i1f4SUVZfqmoLsu/pcNlz+Z0ymOW2ZlldgvK0tk+v6Ru+bVfKqqT\n8kfE+oj4r+aOwM1dZJiRZX9EPB8RuyNiaszL3hoRRyJi74xxSyJiZ0S81Dxe0GGW2yLiYLNudkfE\nNWPKsioiHo+IF5s7SW9uxo993cyRZezrpvU7bDNzrH/0rhd4BbgYOAd4DvjEuHPMyLMfuLCjZX8G\nuBLYO2Pct4HNzfBm4FsdZrkN+OMO1ssK4MpmeDHw38Anulg3c2QZ+7oBAji/GV4EPAV8CngAuL4Z\n/7fA1wZ5vy62/GuBlzPz1cz8Gb0LhTZ0kKNzmfkEcPyU0RvoXVQFY7xbsk+WTmTmocx8thl+h94V\npSvpYN3MkWXssqe1O2y7KP9K4PUZz/veETgmCfwwInZFxKYOc5y0PDMPNcNvAsu7DAPcFBF7mt2C\nseyCzBQRq4Er6G3lOl03p2SBDtbNMHfYnsoDfrAuM68Evgj8UUR8putAJ2Xve1yXp2PuBi6h95+4\nHALuGOfCI+J84EHglsx8e+a0ca+bWbJ0sm5yiDtsT9VF+Q8Cq2Y87/SOwMw82DweAR6mt0K7dDgi\nVgA0j0e6CpKZh5t/bO8D9zDGdRMRi+iV7b7MfKgZ3cm6mS1Ll+umWf7Qd9h2Uf5ngEubI5TnANcD\n2zvIQUScFxGLTw4DXwD2zv2qkdtO7y5J6PhuyZNFa1zHmNZNRARwL7AvM++cMWns66Zfli7WTet3\n2I7zaOWMo5bX0Dtq+grwjS4yNDkupne24TnghXFnAe6n95XxPXr7ajcCS4HHgJeAHwFLOszyPeB5\nYA+94q0YU5Z19L7S7wF2N3/XdLFu5sgy9nUDXEbvDto99D5s/mzGv+OngZeBfwTOHeT9vMJPKsoD\nflJRll8qyvJLRVl+qSjLLxVl+aWiLL9UlOWXivp/Yqs8zOQZRQcAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAADF5JREFUeJzt3X/oXfV9x/Hna1/jHFWoiVnIYtao\nkxUZNrovwaGUrp2tk0IURlFY8Q8hZdSh0P0hHWyuf9kxlf2xOeKUyrBWNxUDk1kngrQM9auLMTHb\n/EFEs5jE2KL7Y239+t4f9wS+C98f1+89917j5/mAy/fcc86958Uhr3vu+ZWbqkJSe35p2gEkTYfl\nlxpl+aVGWX6pUZZfapTllxpl+aVGWX6pUZZfatQpo7w4yRXAXwMzwN9X1a3LzX/W2pnasnnNKIuU\ntIwDb/6Cd96dzzDzrrr8SWaAvwEuB94Cnkuyq6peXuo1Wzav4dnHN692kZJWsO0rbw497yhf+7cB\nr1bV61X1c+AHwPYR3k/SBI1S/k3Awo+Zt7pxkk4CYz/gl2RHkrkkc0ePzY97cZKGNEr5DwILd+DP\n7sb9P1W1s6pmq2p2/bqZERYnqU+jlP854Pwk5yQ5FbgG2NVPLEnjtuqj/VX1QZIbgMcZnOq7p6r2\n9ZZM0liNdJ6/qh4DHuspi6QJ8go/qVGWX2qU5ZcaZfmlRll+qVGWX2qU5ZcaZfmlRll+qVGWX2qU\n5ZcaZfmlRll+qVGWX2qU5ZcaZfmlRll+qVGWX2qU5ZcaZfmlRll+qVGWX2qU5ZcaZfmlRll+qVGW\nX2rUSD/XleQA8D4wD3xQVbN9hJI0fiOVv/O7VfVOD+8jaYL82i81atTyF/DDJM8n2dFHIEmTMerX\n/suq6mCSXwWeSPIfVfX0whm6D4UdAL++qY+9DEl9GGnLX1UHu79HgEeAbYvMs7OqZqtqdv26mVEW\nJ6lHqy5/kk8lOeP4MPBlYG9fwSSN1yjfwzcAjyQ5/j7fr6p/6SWVpLFbdfmr6nXgcz1mkTRBnuqT\nGmX5pUZZfqlRll9qlOWXGuUld/pY+8qvbV1y2uP/vXuCST553PJLjbL8UqMsv9Qoyy81yvJLjbL8\nUqM81beI5U4vSZ8UbvmlRll+qVGWX2qU5ZcaZfmlRll+qVGe6lvEV/f9ZMlpD7z52xNMcnL5289+\nf8lpF5562qre09Ou4+OWX2qU5ZcaZfmlRll+qVGWX2qU5ZcateKpviT3AF8FjlTVb3Xj1gIPAFuA\nA8DXqmrp82MnmT8+841VTdPqTudpOobZ8n8PuOKEcTcDT1bV+cCT3XNJJ5EVy19VTwPvnjB6O3Bv\nN3wvcFXPuSSN2Wr3+TdU1aFu+G0GP9ct6SQy8gG/qiqglpqeZEeSuSRzR4/Nj7o4ST1ZbfkPJ9kI\n0P09stSMVbWzqmaranb9uplVLk5S31Zb/l3Add3wdcCj/cSRNCkrlj/J/cC/Ab+Z5K0k1wO3Apcn\neQX4ve65pJPIiuf5q+raJSZ9qecskibIK/ykRll+qVGWX2qU5ZcaZfmlRll+qVGWX2qU5ZcaZfml\nRll+qVGWX2qU5ZcaZfmlRll+qVGWX2qU5ZcaZfmlRll+qVGWX2qU5ZcaZfmlRll+qVGWX2qU5Zca\nZfmlRll+qVHD/FbfPUmOJNm7YNwtSQ4m2d09rhxvTEl9G2bL/z3gikXG31FVW7vHY/3GkjRuK5a/\nqp4G3p1AFkkTNMo+/w1J9nS7BWf2lkjSRKy2/HcC5wFbgUPAbUvNmGRHkrkkc0ePza9ycZL6tqry\nV9Xhqpqvqg+Bu4Bty8y7s6pmq2p2/bqZ1eaU1LNVlT/JxgVPrwb2LjWvpI+nU1aaIcn9wBeAs5K8\nBfw58IUkW4ECDgDfGGNGSWOwYvmr6tpFRt89hiySJsgr/KRGWX6pUZZfapTllxpl+aVGWX6pUZZf\napTllxpl+aVGWX6pUZZfapTllxpl+aVGWX6pUZZfapTllxpl+aVGWX6pUZZfapTllxpl+aVGWX6p\nUZZfapTllxpl+aVGWX6pUSuWP8nmJE8leTnJviQ3duPXJnkiySvd3zPHH1dSX4bZ8n8AfKuqLgAu\nAb6Z5ALgZuDJqjofeLJ7LukksWL5q+pQVb3QDb8P7Ac2AduBe7vZ7gWuGldISf37SPv8SbYAFwHP\nABuq6lA36W1gQ6/JJI3V0OVPcjrwEHBTVb23cFpVFVBLvG5Hkrkkc0ePzY8UVlJ/hip/kjUMin9f\nVT3cjT6cZGM3fSNwZLHXVtXOqpqtqtn162b6yCypB8Mc7Q9wN7C/qm5fMGkXcF03fB3waP/xJI3L\nKUPMcynwdeClJLu7cd8GbgUeTHI98AbwtfFElDQOK5a/qn4EZInJX+o3jqRJ8Qo/qVGWX2qU5Zca\nZfmlRll+qVGWX2rUMOf5P5F+/L8fLjntO+dePMEk0nS45ZcaZfmlRll+qVGWX2qU5ZcaZfmlRjV7\nqu/S05b+3DvlnM9MMIlWb/fKs2hJbvmlRll+qVGWX2qU5ZcaZfmlRll+qVHNnupbzj//2P+FXJ98\nbvmlRll+qVGWX2qU5ZcaZfmlRll+qVHD/Erv5iRPJXk5yb4kN3bjb0lyMMnu7nHl+ONK6ssw5/k/\nAL5VVS8kOQN4PskT3bQ7quqvxhdP0rgM8yu9h4BD3fD7SfYDm8YdTNJ4faR9/iRbgIuAZ7pRNyTZ\nk+SeJGcu8ZodSeaSzB09Nj9SWEn9Gbr8SU4HHgJuqqr3gDuB84CtDL4Z3LbY66pqZ1XNVtXs+nUz\nPUSW1Iehyp9kDYPi31dVDwNU1eGqmq+qD4G7gG3jiympb8Mc7Q9wN7C/qm5fMH7jgtmuBvb2H0/S\nuAxztP9S4OvAS0mO/4+J3wauTbIVKOAA8I2xJJQ0FsMc7f8RkEUmPdZ/HEmT4hV+UqMsv9Qoyy81\nyvJLjbL8UqMsv9Qoyy81yvJLjbL8UqMsv9Qoyy81yvJLjbL8UqMsv9Qoyy81yvJLjbL8UqMsv9Qo\nyy81yvJLjbL8UqMsv9Qoyy81yvJLjbL8UqMsv9SoYX6o87QkzyZ5Mcm+JH/RjT8nyTNJXk3yQJJT\nxx9XUl+G2fL/DPhiVX0O2ApckeQS4LvAHVX1G8BPgOvHF1NS31Ysfw38T/d0Tfco4IvAP3Xj7wWu\nGktCSWMx1D5/kpnu57mPAE8ArwE/raoPulneAjYt8dodSeaSzB09Nt9HZkk9GKr8VTVfVVuBs4Ft\nwGeHXUBV7ayq2aqaXb9uZpUxJfXtIx3tr6qfAk8BvwN8Oskp3aSzgYM9Z5M0RsMc7V+f5NPd8K8A\nlwP7GXwI/EE323XAo+MKKal/qarlZ0guZHBAb4bBh8WDVfWdJOcCPwDWAv8O/GFV/WyF9zoKvNE9\nPQt4Z7T4vfo45THL4syyuIVZPlNV64d50YrlH5ckc1U1O5WFL+LjlMcsizPL4labxSv8pEZZfqlR\n0yz/zikuezEfpzxmWZxZFreqLFPb55c0XX7tlxo1lfInuSLJf3Z3BN48jQwLshxI8lKS3UnmJrzs\ne5IcSbJ3wbi1SZ5I8kr398wpZrklycFu3exOcuWEsmxO8lSSl7s7SW/sxk983SyTZeLrpvc7bKtq\nog8G1wu8BpwLnAq8CFww6RwL8hwAzprSsj8PXAzsXTDuL4Gbu+Gbge9OMcstwJ9MYb1sBC7uhs8A\n/gu4YBrrZpksE183QIDTu+E1wDPAJcCDwDXd+L8D/miY95vGln8b8GpVvV5VP2dwodD2KeSYuqp6\nGnj3hNHbGVxUBRO8W3KJLFNRVYeq6oVu+H0GV5RuYgrrZpksE1cDvd1hO43ybwLeXPB8yTsCJ6SA\nHyZ5PsmOKeY4bkNVHeqG3wY2TDMMcEOSPd1uwUR2QRZKsgW4iMFWbqrr5oQsMIV1M8odtifygB9c\nVlUXA78PfDPJ56cd6LgafI+b5umYO4HzGPwnLoeA2ya58CSnAw8BN1XVewunTXrdLJJlKuumRrjD\n9kTTKP9BYPOC51O9I7CqDnZ/jwCPMFih03Q4yUaA7u+RaQWpqsPdP7YPgbuY4LpJsoZB2e6rqoe7\n0VNZN4tlmea66ZY/8h220yj/c8D53RHKU4FrgF1TyEGSTyU54/gw8GVg7/KvGrtdDO6ShCnfLXm8\naJ2rmdC6SRLgbmB/Vd2+YNLE181SWaaxbnq/w3aSRysXHLW8ksFR09eAP51Ghi7HuQzONrwI7Jt0\nFuB+Bl8Zf8FgX+16YB3wJPAK8K/A2ilm+QfgJWAPg+JtnFCWyxh8pd8D7O4eV05j3SyTZeLrBriQ\nwR20exh82PzZgn/HzwKvAv8I/PIw7+cVflKjPOAnNcryS42y/FKjLL/UKMsvNcryS42y/FKjLL/U\nqP8DsIgi3QqylHQAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "params={}\n", "params['nbclasses'] = 3\n", "params['imagesize'] = 31\n", "params['prestime'] = 3\n", "params['interpresdelay'] = 2\n", "params['prestimetest'] = 2\n", "params['nbshots'] = 1\n", "\n", "params['nbsteps'] = params['nbshots'] * ((params['prestime'] + params['interpresdelay']) * params['nbclasses']) + params['prestimetest'] \n", "inputT = np.zeros((params['nbsteps'], 1, params['imagesize'], params['imagesize'])) #inputTensor, initially in numpy format...\n", "labelT = np.zeros((params['nbsteps'], 1, params['nbclasses'])) #labelTensor, initially in numpy format...\n", "\n", "patterns=[]\n", "cats = np.random.permutation(np.arange(len(imagedata)))[:params['nbclasses']] # Which categories to use for this episode?\n", "testcat = random.choice(cats)\n", "\n", "# Inserting the character images and labels in the input tensor at the proper places\n", "location = 0\n", "for nc in range(params['nbshots']):\n", " np.random.shuffle(cats)\n", " for ii, catnum in enumerate(cats):\n", " p = random.choice(imagedata[catnum])\n", " for nr in range(rots[catnum]):\n", " p = np.rot90(p)\n", " p = skimage.transform.resize(p, (31, 31))\n", " for nn in range(params['prestime']):\n", " #numi =nc * (params['nbpatterns'] * (params['prestime']+params['interpresdelay'])) + ii * (params['prestime']+params['interpresdelay']) + nn\n", "\n", " inputT[location][0][:][:] = p[:][:]\n", " #labelT[location][0][cats.index(\n", " location += 1\n", " location += params['interpresdelay']\n", "\n", "p = random.choice(imagedata[testcat])\n", "for nr in range(rots[testcat]):\n", " p = np.rot90(p)\n", "p = skimage.transform.resize(p, (31, 31))\n", "for nn in range(params['prestimetest']):\n", " inputT[location][0][:][:] = p[:][:]\n", " location += 1\n", "\n", "print(location, params['nbsteps'], cats, rots[cats])\n", "\n", "plt.figure()\n", "for x in (1, 1+4, 1+9, 1+14):\n", " plt.figure()\n", " plt.imshow(inputT[x][0])\n", "plt.figure()\n", "plt.imshow(inputT[-1][0])\n" ] }, { "cell_type": "code", "execution_count": 126, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 126, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAADkJJREFUeJzt3W2MXOV5h/HrzsZAGlDBxrFc49ZA\n3CY0CoZu3ZAgQkABx40KNBUCtRGRnBgRQKCkUi2iNrT0A2kL9C3QmtqNFRESWkD4gym4iNZCRYaF\nGOOXJrzECIzxGhME/VASL3c/zLG0tfbMDjNnZtZ+rp+0mjPnOS+3jv2fM3Oe88xEZiKpPO8bdgGS\nhsPwS4Uy/FKhDL9UKMMvFcrwS4Uy/FKhDL9UKMMvFer9vawcEcuAvwFGgH/KzJvbLX/i7JFctHBW\nL7uU1Maul3/O629MRCfLdh3+iBgBvg18FngFeDIi1mfmjrp1Fi2cxRMPLex2l5KmsfTClztetpe3\n/UuB5zPzxcz8GfB94KIetidpgHoJ/wJg8svMK9U8SYeBvl/wi4iVETEWEWP79k/0e3eSOtRL+HcD\nkz/An1TN+38yc3Vmjmbm6Nw5Iz3sTlKTegn/k8DiiDg5Io4CLgPWN1OWpH7r+mp/Zh6IiGuAh2h1\n9a3NzO2NVSapr3rq58/MDcCGhmqRNEDe4ScVyvBLhTL8UqEMv1Qowy8VyvBLhTL8UqEMv1Qowy8V\nyvBLhTL8UqEMv1Qowy8VyvBLhTL8UqEMv1Qowy8VyvBLhTL8UqEMv1Qowy8VyvBLhTL8UqEMv1Qo\nwy8VyvBLherp57oiYhfwNjABHMjM0SaKktR/PYW/8pnMfL2B7UgaIN/2S4XqNfwJPBwRT0XEyiYK\nkjQYvb7tPzszd0fEh4CNEfHfmblp8gLVi8JKgF9e0MSnDElN6OnMn5m7q8dx4H5g6RTLrM7M0cwc\nnTtnpJfdSWpQ1+GPiA9GxHEHp4ELgG1NFSapv3p5Hz4PuD8iDm7ne5n5b41UNQAX/tKSrtZ76NUt\nDVciDUfX4c/MF4HTG6xF0gDZ1ScVyvBLhTL8UqEMv1Qowy8VylvudNha/qPltW0Tn3m1q22OHP+L\ntW0bdvxnV9ucqTzzS4Uy/FKhDL9UKMMvFcrwS4Uy/FKh7OobkHO//JXatqM3PNn4/t753G/Wtv3H\nmjsb31+/LD/t07VtE2/Wd+cdOP83atv+ds3f17Z9bdFZtW3tRoIejqM9PfNLhTL8UqEMv1Qowy8V\nyvBLhTL8UqGK7epb8eOf1Lat+dWTa9u6/eLPo6nvzut2JNnyT/9u/f4ebL77EOCyn5xX2/bqX364\ntm3T7atr23798d+vbTvpze21bftX1HfLjd10R20bfKBNWzk880uFMvxSoQy/VCjDLxXK8EuFMvxS\noSIz2y8QsRb4PDCemR+r5s0GfgAsAnYBl2bmT6fb2ejpx+QTDy3sseThWn7GBY1vc8MPH258m912\nSf71rv9q2379ok92td12o95m0u8mnnLvlbVti6/dPNBaurH0wpcZe+Z/o5NlOznzfwdYdsi8VcAj\nmbkYeKR6LukwMm34M3MT8MYhsy8C1lXT64CLG65LUp91+5l/XmbuqaZfo/Vz3ZIOIz1f8MvWRYPa\nCwcRsTIixiJibN/+iV53J6kh3YZ/b0TMB6gex+sWzMzVmTmamaNz54x0uTtJTes2/OuBK6rpK4AH\nmilH0qBMO6ovIu4GzgVOjIhXgG8CNwP3RMQK4CXg0n4WOZP0o1uuH8a/Wt8l96Hb67vzuu3Km86v\n/fNVtW2LeLy2rX3X4y/0UNHUXvzCP9a2XXhtd12SM9W04c/My2uazm+4FkkD5B1+UqEMv1Qowy8V\nyvBLhTL8UqGK/QLPI1277rx2fmfH/rbtVx//cm1bu9F5i75R353XzkePar47Ty2e+aVCGX6pUIZf\nKpThlwpl+KVCGX6pUHb1Hcb+/PWPdLXe+07/aG3b1cff3W05M+ZLLNUZz/xSoQy/VCjDLxXK8EuF\nMvxSoQy/VCi7+ma487705dq2WQ+PdbXNBx/svjtPRw7P/FKhDL9UKMMvFcrwS4Uy/FKhDL9UqE5+\nq28t8HlgPDM/Vs27EfgKsK9a7IbM3NCvIget3RdRDtosuuvOc4SdptPJmf87wLIp5t+WmUuqvyMm\n+FIppg1/Zm4C3hhALZIGqJfP/NdExNaIWBsRJzRWkaSB6Db8dwCnAkuAPcAtdQtGxMqIGIuIsX37\nJ7rcnaSmdRX+zNybmROZ+S5wJ7C0zbKrM3M0M0fnzhnptk5JDesq/BExf9LTS4BtzZQjaVA66eq7\nGzgXODEiXgG+CZwbEUuABHYBV/axRkl9MG34M/PyKWav6UMtA9VtX/74Vz9Z23agD78p+ezXbm9+\noxLe4ScVy/BLhTL8UqEMv1Qowy8VyvBLhfLbe6fw3N/9Vm3bi1+w6+1Idsq99besLGbzACvpP8/8\nUqEMv1Qowy8VyvBLhTL8UqEMv1Qou/o0o7Ubfbl/xVm1bWM33dHV/hZfe2R157XjmV8qlOGXCmX4\npUIZfqlQhl8qlOGXCmVXn4rT7Ze33v7SY21aj+2umCHyzC8VyvBLhTL8UqEMv1Qowy8VatrwR8TC\niHg0InZExPaIuK6aPzsiNkbEc9XjCf0vV1JTOunqOwB8PTOfjojjgKciYiPwJeCRzLw5IlYBq4A/\n6l+pg9NuZNem365f75xj+lCMas1Z83ht24VruuvO23VT/UjBU2dt6WqbM9W0Z/7M3JOZT1fTbwM7\ngQXARcC6arF1wMX9KlJS897TZ/6IWAScAWwG5mXmnqrpNWBeo5VJ6quOwx8RxwL3Atdn5luT2zIz\ngaxZb2VEjEXE2L79Ez0VK6k5HYU/ImbRCv5dmXlfNXtvRMyv2ucD41Otm5mrM3M0M0fnzhlpomZJ\nDejkan8Aa4CdmXnrpKb1wBXV9BXAA82XJ6lfOrna/yngi8CzEXHwcucNwM3APRGxAngJuLQ/JUrq\nh2nDn5mPAVHTfH6z5QzOQ68eWd02R6p2/06jf3xVV9ts/+We5fy/8A4/qVCGXyqU4ZcKZfilQhl+\nqVCGXyqUX+Cpw1a3v8enFs/8UqEMv1Qowy8VyvBLhTL8UqEMv1Qowy8VyvBLhTL8UqEMv1Qowy8V\nyvBLhTL8UqEMv1Qowy8VyvBLhTL8UqEMv1Qowy8VyvBLherkV3oXRsSjEbEjIrZHxHXV/BsjYndE\nbKn+lve/XElN6eTbew8AX8/MpyPiOOCpiNhYtd2WmX/Vv/Ik9Usnv9K7B9hTTb8dETuBBf0uTFJ/\nvafP/BGxCDgD2FzNuiYitkbE2og4oWadlRExFhFj+/ZP9FSspOZ0HP6IOBa4F7g+M98C7gBOBZbQ\nemdwy1TrZebqzBzNzNG5c0YaKFlSEzoKf0TMohX8uzLzPoDM3JuZE5n5LnAnsLR/ZUpqWidX+wNY\nA+zMzFsnzZ8/abFLgG3NlyepXzq52v8p4IvAsxGxpZp3A3B5RCwBEtgFXNmXCiX1RSdX+x8DYoqm\nDc2XI2lQvMNPKpThlwpl+KVCGX6pUIZfKpThlwpl+KVCGX6pUIZfKpThlwpl+KVCGX6pUIZfKpTh\nlwpl+KVCGX6pUIZfKpThlwpl+KVCGX6pUIZfKpThlwpl+KVCGX6pUIZfKpThlwrVyQ91HhMRT0TE\nMxGxPSL+tJp/ckRsjojnI+IHEXFU/8uV1JROzvzvAOdl5unAEmBZRHwC+BZwW2Z+GPgpsKJ/ZUpq\n2rThz5b/qZ7Oqv4SOA/412r+OuDivlQoqS86+swfESPVz3OPAxuBF4A3M/NAtcgrwIKadVdGxFhE\njO3bP9FEzZIa0FH4M3MiM5cAJwFLgY90uoPMXJ2Zo5k5OnfOSJdlSmrae7ran5lvAo8CZwHHR8T7\nq6aTgN0N1yapjzq52j83Io6vpj8AfBbYSetF4Peqxa4AHuhXkZKaF5nZfoGIj9O6oDdC68Xinsz8\ns4g4Bfg+MBv4IfAHmfnONNvaB7xUPT0ReL238hs1k+qxlqlZy9Qm1/IrmTm3k5WmDX+/RMRYZo4O\nZedTmEn1WMvUrGVq3dbiHX5SoQy/VKhhhn/1EPc9lZlUj7VMzVqm1lUtQ/vML2m4fNsvFWoo4Y+I\nZRHxo2pE4Kph1DCpll0R8WxEbImIsQHve21EjEfEtknzZkfExoh4rno8YYi13BgRu6tjsyUilg+o\nloUR8WhE7KhGkl5XzR/4sWlTy8CPTeMjbDNzoH+07hd4ATgFOAp4Bjht0HVMqmcXcOKQ9n0OcCaw\nbdK8vwBWVdOrgG8NsZYbgT8cwnGZD5xZTR8H/Bg4bRjHpk0tAz82QADHVtOzgM3AJ4B7gMuq+f8A\nXNXJ9oZx5l8KPJ+ZL2bmz2jdKHTREOoYuszcBLxxyOyLaN1UBQMcLVlTy1Bk5p7MfLqafpvWHaUL\nGMKxaVPLwGVLYyNshxH+BcDLk57XjggckAQejoinImLlEOs4aF5m7qmmXwPmDbMY4JqI2Fp9LBjI\nR5DJImIRcAats9xQj80htcAQjk0vI2wP5QU/ODszzwQ+B1wdEecMu6CDsvU+bpjdMXcAp9L6Epc9\nwC2D3HlEHAvcC1yfmW9Nbhv0sZmilqEcm+xhhO2hhhH+3cDCSc+HOiIwM3dXj+PA/bQO6DDtjYj5\nANXj+LAKycy91X+2d4E7GeCxiYhZtMJ2V2beV80eyrGZqpZhHptq/z2PsB1G+J8EFldXKI8CLgPW\nD6EOIuKDEXHcwWngAmBb+7X6bj2tUZIw5NGSB4NWuYQBHZuICGANsDMzb53UNPBjU1fLMI5N4yNs\nB3m1ctJVy+W0rpq+AHxjGDVUdZxCq7fhGWD7oGsB7qb1lvHntD6rrQDmAI8AzwH/DsweYi3fBZ4F\nttIK3vwB1XI2rbf0W4Et1d/yYRybNrUM/NgAH6c1gnYrrRebP5n0//gJ4HngX4CjO9med/hJhfKC\nn1Qowy8VyvBLhTL8UqEMv1Qowy8VyvBLhTL8UqH+D3ePpxaoEdRyAAAAAElFTkSuQmCC\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "z = np.zeros((1, 31, 31))\n", "z[0][:][:] = patterns[1][:][:]\n", "plt.imshow(z[0])" ] }, { "cell_type": "code", "execution_count": 140, "metadata": {}, "outputs": [ { "ename": "AttributeError", "evalue": "module 'numpy' has no attribute 'find'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mcats\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcats\u001b[0m\u001b[0;34m==\u001b[0m\u001b[0;36m414\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mAttributeError\u001b[0m: module 'numpy' has no attribute 'find'" ] } ], "source": [ "cats\n", "np.find(cats==414)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: omniglot/README.md ================================================ # Omniglot experiment This code performs the Omniglot task (fast learning of image-label mappings). To run this code, you must download [the Python version of the Omniglot dataset](https://github.com/brendenlake/omniglot), and move the `omniglot-master` directory inside this directory. You will also need the scikit-image library (in addition to PyTorch). To reproduce the results shown in the paper: ``` python3 omniglot.py --nbclasses 5 --nbiter 5000000 --rule oja --activ tanh --steplr 1000000 --prestime 1 --prestimetest 1 --gamma .666 --alpha free --lr 3e-5 ``` ================================================ FILE: omniglot/omniglot.py ================================================ # Differentiable plasticity: Omniglot task. # Copyright (c) 2018 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # You MUST download the Python version of the Omniglot dataset # (https://github.com/brendenlake/omniglot), and move the 'omniglot-master' # directory inside this directory. # To get the results shown in the paper: # python3 omniglot.py --nbclasses 5 --nbiter 5000000 --rule oja --activ tanh --steplr 1000000 --prestime 1 --gamma .666 --alpha free --lr 3e-5 # Alternative (using a shared, though still learned alpha across all connections): # python3 omniglot.py --nbclasses 5 --nbiter 5000000 --activ tanh --steplr 1000000 --prestime 1 --gamma 0.3 --lr 1e-4 --alpha yoked # Note that this code uses click rather than argparse for command-line # parameter handling. I won't do that again. import pdb import torch import torch.nn as nn from torch.autograd import Variable import click import numpy as np from numpy import random import torch.nn.functional as F from torch import optim from torch.optim import lr_scheduler import random import sys import pickle import pdb import time import skimage from skimage import transform from skimage import io import os import platform import numpy as np import glob np.set_printoptions(precision=4) defaultParams = { 'activ': 'tanh', # 'tanh' or 'selu' #'plastsize': 200, 'rule': 'hebb', # 'hebb' or 'oja' 'alpha': 'free', # 'free' of 'yoked' (if the latter, alpha is a single scalar learned parameter, shared across all connection) 'steplr': 1e6, # How often should we change the learning rate? 'nbclasses': 5, 'gamma': .666, # The annealing factor of learning rate decay for Adam 'flare': 0, # Whether or not the ConvNet has more features in higher channels 'nbshots': 1, # Number of 'shots' in the few-shots learning 'prestime': 1, 'nbf' : 64, # Number of features. 128 is better (unsurprisingly) but we keep 64 for fair comparison with other reports 'prestimetest': 1, 'ipd': 0, # Inter-presentation delay 'imgsize': 31, 'nbiter': 5000000, 'lr': 3e-5, 'test_every': 500, 'save_every': 10000, 'rngseed':0 } NBTESTCLASSES = 100 #ttype = torch.FloatTensor; ttype = torch.cuda.FloatTensor; # Generate the full list of inputs, labels, and the target label for an episode def generateInputsLabelsAndTarget(params, imagedata, test=False): #print(("Input Boost:", params['inputboost'])) #params['nbsteps'] = params['nbshots'] * ((params['prestime'] + params['ipd']) * params['nbclasses']) + params['prestimetest'] inputT = np.zeros((params['nbsteps'], 1, 1, params['imgsize'], params['imgsize'])) #inputTensor, initially in numpy format... Note dimensions: number of steps x batchsize (always 1) x NbChannels (also 1) x h x w labelT = np.zeros((params['nbsteps'], 1, params['nbclasses'])) #labelTensor, initially in numpy format... patterns=[] if test: cats = np.random.permutation(np.arange(len(imagedata) - NBTESTCLASSES, len(imagedata)))[:params['nbclasses']] # Which categories to use for this *testing* episode? else: cats = np.random.permutation(np.arange(len(imagedata) - NBTESTCLASSES))[:params['nbclasses']] # Which categories to use for this *training* episode? #print("Test is", test, ", cats are", cats) #cats = np.array(range(params['nbclasses'])) + 10 cats = np.random.permutation(cats) #print(cats) # We show one picture of each category, with labels, then one picture of one of these categories as a test, without label # But each of the categories may undergo rotation by 0, 90, 180 or 270deg, for augmenting the dataset # NOTE: We randomly assign one rotation to all the possible categories, not just the ones selected for the episode - it makes the coding simpler rots = np.random.randint(4, size=len(imagedata)) #rots.fill(0) testcat = random.choice(cats) # select the class on which we'll test in this episode unpermcats = cats.copy() # Inserting the character images and labels in the input tensor at the proper places location = 0 for nc in range(params['nbshots']): np.random.shuffle(cats) # Presentations occur in random order for ii, catnum in enumerate(cats): #print(catnum) p = random.choice(imagedata[catnum]) for nr in range(rots[catnum]): p = np.rot90(p) p = skimage.transform.resize(p, (31, 31)) for nn in range(params['prestime']): #numi =nc * (params['nbclasses'] * (params['prestime']+params['ipd'])) + ii * (params['prestime']+params['ipd']) + nn inputT[location][0][0][:][:] = p[:][:] labelT[location][0][np.where(unpermcats == catnum)] = 1 # The (one-hot) label is the position of the category number in the original (unpermuted) list #if nn == 0: # print(labelT[location][0]) location += 1 location += params['ipd'] # Inserting the test character p = random.choice(imagedata[testcat]) for nr in range(rots[testcat]): p = np.rot90(p) p = skimage.transform.resize(p, (31, 31)) for nn in range(params['prestimetest']): inputT[location][0][0][:][:] = p[:][:] location += 1 # Generating the test label testlabel = np.zeros(params['nbclasses']) testlabel[np.where(unpermcats == testcat)] = 1 #print(testcat, testlabel) #pdb.set_trace() assert(location == params['nbsteps']) inputT = torch.from_numpy(inputT).type(ttype) # Convert from numpy to pytorch Tensor labelT = torch.from_numpy(labelT).type(ttype) targetL = torch.from_numpy(testlabel).type(ttype) return inputT, labelT, targetL class Network(nn.Module): def __init__(self, params): super(Network, self).__init__() self.rule = params['rule'] if params['flare'] == 1: self.cv1 = torch.nn.Conv2d(1, params['nbf'] //4 , 3, stride=2).cuda() self.cv2 = torch.nn.Conv2d(params['nbf'] //4 , params['nbf'] //4 , 3, stride=2).cuda() self.cv3 = torch.nn.Conv2d(params['nbf'] //4, params['nbf'] //2, 3, stride=2).cuda() self.cv4 = torch.nn.Conv2d(params['nbf'] //2, params['nbf'], 3, stride=2).cuda() else: self.cv1 = torch.nn.Conv2d(1, params['nbf'] , 3, stride=2).cuda() self.cv2 = torch.nn.Conv2d(params['nbf'] , params['nbf'] , 3, stride=2).cuda() self.cv3 = torch.nn.Conv2d(params['nbf'] , params['nbf'] , 3, stride=2).cuda() self.cv4 = torch.nn.Conv2d(params['nbf'] , params['nbf'], 3, stride=2).cuda() # Alternative architecture: have a separate layer of # plastic weights between the embedding and the output. We don't use # this in the paper. #self.conv2plast = torch.nn.Linear(params['nbf'], params['plastsize']).cuda() # Notice that the vectors are row vectors, and the matrices are transposed wrt the usual order, following apparent pytorch conventions # Each *column* of w targets a single output neuron self.w = torch.nn.Parameter((.01 * torch.randn(params['nbf'], params['nbclasses'])).cuda(), requires_grad=True) #self.w = torch.nn.Parameter((.01 * torch.rand(params['plastsize'], params['nbclasses'])).cuda(), requires_grad=True) if params['alpha'] == 'free': self.alpha = torch.nn.Parameter((.01 * torch.rand(params['nbf'], params['nbclasses'])).cuda(), requires_grad=True) # Note: rand rather than randn (all positive) elif params['alpha'] == 'yoked': self.alpha = torch.nn.Parameter((.01 * torch.ones(1)).cuda(), requires_grad=True) else : raise ValueError("Must select a value for alpha ('free' or 'yoked')") self.eta = torch.nn.Parameter((.01 * torch.ones(1)).cuda(), requires_grad=True) # Everyone has the same eta self.params = params def forward(self, inputx, inputlabel, hebb): if self.params['activ'] == 'selu': activ = F.selu(self.cv1(inputx)) activ = F.selu(self.cv2(activ)) activ = F.selu(self.cv3(activ)) activ = F.selu(self.cv4(activ)) elif self.params['activ'] == 'relu': activ = F.relu(self.cv1(inputx)) activ = F.relu(self.cv2(activ)) activ = F.relu(self.cv3(activ)) activ = F.relu(self.cv4(activ)) elif self.params['activ'] == 'tanh': activ = F.tanh(self.cv1(inputx)) activ = F.tanh(self.cv2(activ)) activ = F.tanh(self.cv3(activ)) activ = F.tanh(self.cv4(activ)) else: raise ValueError("Parameter 'activ' is incorrect (must be tanh, relu or selu)") #activ = F.tanh(self.conv2plast(activ.view(1, self.params['nbf']))) #activin = activ.view(-1, self.params['plastsize']) activin = activ.view(-1, self.params['nbf']) if self.params['alpha'] == 'free': activ = activin.mm( self.w + torch.mul(self.alpha, hebb)) + 1000.0 * inputlabel # The expectation is that a nonzero inputlabel will overwhelm the inputs and clamp the outputs elif self.params['alpha'] == 'yoked': activ = activin.mm( self.w + self.alpha * hebb) + 1000.0 * inputlabel # The expectation is that a nonzero inputlabel will overwhelm the inputs and clamp the outputs activout = F.softmax( activ ) if self.rule == 'hebb': hebb = (1 - self.eta) * hebb + self.eta * torch.bmm(activin.unsqueeze(2), activout.unsqueeze(1))[0] # bmm used to implement outer product; remember activs have a leading singleton dimension elif self.rule == 'oja': hebb = hebb + self.eta * torch.mul((activin[0].unsqueeze(1) - torch.mul(hebb , activout[0].unsqueeze(0))) , activout[0].unsqueeze(0)) # Oja's rule. Remember that yin, yout are row vectors (dim (1,N)). Also, broadcasting! else: raise ValueError("Must select one learning rule ('hebb' or 'oja')") return activout, hebb def initialZeroHebb(self): #return Variable(torch.zeros(self.params['plastsize'], self.params['nbclasses']).type(ttype)) return Variable(torch.zeros(self.params['nbf'], self.params['nbclasses']).type(ttype)) def train(paramdict=None): #params = dict(click.get_current_context().params) print("Starting training...") params = {} params.update(defaultParams) if paramdict: params.update(paramdict) print("Passed params: ", params) print(platform.uname()) sys.stdout.flush() params['nbsteps'] = params['nbshots'] * ((params['prestime'] + params['ipd']) * params['nbclasses']) + params['prestimetest'] # Total number of steps per episode suffix = "W"+"".join([str(x)+"_" if pair[0] is not 'nbsteps' and pair[0] is not 'rngseed' and pair[0] is not 'save_every' and pair[0] is not 'test_every' else '' for pair in sorted(zip(params.keys(), params.values()), key=lambda x:x[0] ) for x in pair])[:-1] + "_rngseed_" + str(params['rngseed']) # Turning the parameters into a nice suffix for filenames print("Suffix: ", suffix, "length:", len(suffix)) # Initialize random seeds (first two redundant?) print("Setting random seeds") np.random.seed(params['rngseed']); random.seed(params['rngseed']); torch.manual_seed(params['rngseed']) #print(click.get_current_context().params) print("Loading Omniglot data...") imagedata = [] imagefilenames=[] for basedir in ('./omniglot-master/python/images_background/', './omniglot-master/python/images_evaluation/'): alphabetdirs = glob.glob(basedir+'*') print(alphabetdirs[:4]) for alphabetdir in alphabetdirs: chardirs = glob.glob(alphabetdir+"/*") for chardir in chardirs: chardata = [] charfiles = glob.glob(chardir+'/*') for fn in charfiles: filedata = skimage.io.imread(fn) / 255.0 #plt.imread(fn) chardata.append(filedata) imagedata.append(chardata) imagefilenames.append(fn) # imagedata is now a list of lists of numpy arrays # imagedata[CharactertNumber][FileNumber] -> numpy(105,105) np.random.shuffle(imagedata) # Randomize order of characters print(len(imagedata)) print(imagedata[1][2].shape) print("Data loaded!") print("Initializing network") net = Network(params) #net.cuda() print ("Shape of all optimized parameters:", [x.size() for x in net.parameters()]) allsizes = [torch.numel(x.data.cpu()) for x in net.parameters()] print ("Size (numel) of all optimized elements:", allsizes) print ("Total size (numel) of all optimized elements:", sum(allsizes)) #total_loss = 0.0 print("Initializing optimizer") #optimizer = torch.optim.Adam([net.w, net.alpha, net.eta], lr=params['lr']) optimizer = torch.optim.Adam(net.parameters(), lr=1.0*params['lr']) #scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, params['gamma']) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, gamma=params['gamma'], step_size=params['steplr']) all_losses = [] all_losses_objective = [] lossbetweensaves = 0.0 lossbetweensavesprev = 1e+10 #test_every = 20 nowtime = time.time() print("Starting episodes...") sys.stdout.flush() for numiter in range(params['nbiter']): hebb = net.initialZeroHebb() optimizer.zero_grad() is_test_step = ((numiter+1) % params['test_every'] == 0) inputs, labels, target = generateInputsLabelsAndTarget(params, imagedata, test=is_test_step) for numstep in range(params['nbsteps']): y, hebb = net(Variable(inputs[numstep], requires_grad=False), Variable(labels[numstep], requires_grad=False), hebb) # Compute the loss criterion = torch.nn.BCELoss() loss = criterion(y[0], Variable(target, requires_grad=False)) # Compute the gradients if is_test_step == False: loss.backward() maxg = 0.0 scheduler.step() optimizer.step() lossnum = loss.data[0] lossbetweensaves += lossnum all_losses_objective.append(lossnum) #total_loss += lossnum if is_test_step: # (numiter+1) % params['test_every'] == 0: print(numiter, "====") td = target.cpu().numpy() yd = y.data.cpu().numpy()[0] print("y: ", yd[:10]) print("target: ", td[:10]) #print("target: ", target.unsqueeze(0)[0][:10]) absdiff = np.abs(td-yd) print("Mean / median / max abs diff:", np.mean(absdiff), np.median(absdiff), np.max(absdiff)) print("Correlation (full / sign): ", np.corrcoef(td, yd)[0][1], np.corrcoef(np.sign(td), np.sign(yd))[0][1]) #print inputs[numstep] previoustime = nowtime nowtime = time.time() print("Time spent on last", params['test_every'], "iters: ", nowtime - previoustime) #total_loss /= params['test_every'] #print("Mean loss over last", params['test_every'], "iters:", total_loss) #all_losses.append(total_loss) print("Loss on single withheld-data episode:", lossnum) all_losses.append(lossnum) print ("Eta: ", net.eta.data.cpu().numpy()) sys.stdout.flush() #total_loss = 0 if (numiter+1) % params['save_every'] == 0: print("Saving files...") lossbetweensaves /= params['save_every'] print("Average loss over the last", params['save_every'], "episodes:", lossbetweensaves) print("Alternative computation (should be equal):", np.mean(all_losses_objective[-params['save_every']:])) losslast100 = np.mean(all_losses_objective[-100:]) print("Average loss over the last 100 episodes:", losslast100) # Instability detection; useful for SELUs, which seem to be divergence-prone # NOTE: highly experimental! # Note that if we are unlucky enough to have diverged within the last 100 timesteps, this may not save us. #if losslast100 > 2 * lossbetweensavesprev: # print("We have diverged ! Restoring last savepoint!") # net.load_state_dict(torch.load('./torchmodel_'+suffix + '.txt')) #else: # to "print("Saved!")" print("Saving local files...") localsuffix = suffix if (numiter + 1) % 500000 == 0: localsuffix = localsuffix + "_"+str(numiter+1) with open('results_'+localsuffix+'.dat', 'wb') as fo: pickle.dump(net.w.data.cpu().numpy(), fo) pickle.dump(net.alpha.data.cpu().numpy(), fo) pickle.dump(net.eta.data.cpu().numpy(), fo) pickle.dump(all_losses, fo) pickle.dump(params, fo) with open('loss_'+localsuffix+'.txt', 'w') as thefile: for item in all_losses: thefile.write("%s\n" % item) torch.save(net.state_dict(), 'torchmodel_'+localsuffix+'.txt') # # Uber-only if os.path.isdir('/mnt/share/tmiconi'): print("Transferring to NFS storage...") for fn in ['results_'+localsuffix+'.dat', 'loss_'+localsuffix+'.txt', 'torchmodel_'+localsuffix+'.txt']: result = os.system( 'cp {} {}'.format(fn, '/mnt/share/tmiconi/omniglot-nfs/'+fn)) print("Done!") lossbetweensavesprev = lossbetweensaves lossbetweensaves = 0 sys.stdout.flush() sys.stderr.flush() @click.command() @click.option('--nbclasses', default=defaultParams['nbclasses']) @click.option('--alpha', default=defaultParams['alpha']) #@click.option('--plastsize', default=defaultParams['plastsize']) @click.option('--rule', default=defaultParams['rule']) @click.option('--gamma', default=defaultParams['gamma']) @click.option('--steplr', default=defaultParams['steplr']) @click.option('--activ', default=defaultParams['activ']) @click.option('--flare', default=defaultParams['flare']) @click.option('--nbshots', default=defaultParams['nbshots']) @click.option('--nbf', default=defaultParams['nbf']) @click.option('--prestime', default=defaultParams['prestime']) @click.option('--prestimetest', default=defaultParams['prestimetest']) @click.option('--ipd', default=defaultParams['ipd']) @click.option('--nbiter', default=defaultParams['nbiter']) @click.option('--lr', default=defaultParams['lr']) @click.option('--test_every', default=defaultParams['test_every']) @click.option('--save_every', default=defaultParams['save_every']) @click.option('--rngseed', default=defaultParams['rngseed']) def main(nbclasses, alpha, rule, gamma, steplr, activ, flare, nbshots, nbf, prestime, prestimetest, ipd, nbiter, lr, test_every, save_every, rngseed): train(paramdict=dict(click.get_current_context().params)) #print(dict(click.get_current_context().params)) if __name__ == "__main__": #train() main() ================================================ FILE: omniglot/opus.docker ================================================ #tmiconi_omniglot #latest #. FROM localhost:5000/opus-deep-learning:master-test-2017_9_7_20_56_10 RUN pip3 install scikit-image RUN pip3 install click RUN mkdir /home/work RUN mkdir /home/work/omniglot-master/ COPY ./*.py /home/work/ ADD ./omniglot-master /home/work/omniglot-master/ ENV LC_ALL C.UTF-8 ENV LANG C.UTF-8 ================================================ FILE: omniglot/plotresults.py ================================================ import numpy as np import glob import matplotlib.pyplot as plt groupnames = glob.glob('./tmp/loss*rngseed_0.txt') #fnames = glob.glob('./tmp/loss_api_*.txt') #fnames = glob.glob('./tmp/loss_fixed_*.txt') def mavg(x, N): cumsum = np.cumsum(np.insert(x, 0, 0)) return (cumsum[N:] - cumsum[:-N]) / N plt.ion() #plt.figure(figsize=(5,4)) # Smaller figure = relative larger fonts plt.figure() maxl = 100 for numgroup, groupname in enumerate(groupnames): g = groupname[:-6]+"*" print(g) fnames = glob.glob(g) fulllosses=[] losses=[] lgts=[] for fn in fnames: if "COPY" in fn: continue if "00.tx" in fn: continue z = np.loadtxt(fn) z = z[::10] # Decimation #z = mavg(z, 100) lgts.append(len(z)) fulllosses.append(z) minlen = min(lgts) for z in fulllosses: losses.append(z[:minlen]) losses = np.array(losses) meanl = np.mean(losses, axis=0) stdl = np.std(losses, axis=0) medianl = np.median(losses, axis=0) q1l = np.percentile(losses, 25, axis=0) q3l = np.percentile(losses, 75, axis=0) highl = np.max(losses, axis=0) lowl = np.min(losses, axis=0) #highl = meanl+stdl #lowl = meanl-stdl myls = '-' if numgroup >= 8: myls = '--' xx = range(len(meanl)) # xticks and labels if len(meanl) > maxl: maxl = len(meanl) #plt.plot(mavg(meanl, 100), label=g) #, color='blue') #plt.fill_between(xx, lowl, highl, alpha=.1) #plt.fill_between(xx, q1l, q3l, alpha=.3) #plt.plot(meanl) #, color='blue') plt.plot(mavg(medianl, 10), label=g, ls=myls) #, color='blue') # mavg changes the number of points ! #plt.plot(mavg(q1l, 100), label=g, alpha=.3) #, color='blue') #plt.plot(mavg(q3l, 100), label=g, alpha=.3) #, color='blue') #plt.fill_between(xx, q1l, q3l, alpha=.2) #plt.plot(medianl, label=g) #, color='blue') plt.legend() #plt.xlabel('Loss (sum square diff. b/w final output and target)') plt.xlabel('Number of Episodes') plt.ylabel('Loss') xt = range(0, maxl, 100) xtl = [str(5000*i) for i in xt] #5000 = 500 episode per loss saving, plus the decimation above plt.xticks(xt, xtl) plt.tight_layout() ================================================ FILE: omniglot/request.json ================================================ { "dockerImage":"tmiconi_omniglot", "tag":"master-test-2018_6_22_10_40_5", "name":"Exp7_OmniglotNoSepPlast_alpha_free_tanh_oja_lr3e-5_gamma0.666_NFS", "cpus":2.0, "cmdLine":"export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \u0026\u0026 PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/nvidia/bin:/opt/hadoop/latest/bin \u0026\u0026 export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64/ \u0026\u0026 export LC_ALL=C.UTF-8 \u0026\u0026 export LANG=C.UTF-8 \u0026\u0026 cd /home/work/ \u0026\u0026 python3 omniglot.py --nbclasses 5 --nbiter 5000000 --rule oja --activ tanh --steplr 1000000 --prestime 1 --prestimetest 1 --gamma .666 --alpha free --lr 3e-5 --rngseed {{mesos.instance}}", "ramMB":6000, "gpus":1, "diskMB":6000, "resourcePool": "/ailabs/p2/tmiconi", "cluster":"opusprodda1", "environment":"devel", "user":"tmiconi", "instances":10, "isService":false, "cronSchedule":"", "custom":{}, "application":"testversion", "maxRetries":1, "constraints":{"sku":"1080ti"}, "accessTypes":[], "dependencies":[], "cronCollisionPolicy":"CANCEL_NEW", "emailOnFail":[], "emailOnSucceed":[] } ================================================ FILE: omniglot/test_omniglot_allseeds.py ================================================ # Differentiable plasticity: Omniglot task. # Copyright (c) 2018 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Using the output files produced by multiple runs of omniglot.py, test the # trained networks and report their performance (using withheld test classes). # NOTE: you need to specify the suffix of the files you want to test (see # definition of suffix below). Also be sure to use the proper directory. import pdb import torch import torch.nn as nn from torch.autograd import Variable import click import numpy as np from numpy import random import torch.nn.functional as F from torch import optim import random import sys import pickle import pdb import time import skimage from skimage import transform import os import platform import matplotlib.pyplot as plt import glob import omniglot from omniglot import Network np.set_printoptions(precision=4) defaultParams = { # Not really used as the parameters will be read from the saved files 'nbclasses': 5, 'nbshots': 1, # Number of 'shots' in the few-shots learning 'prestime': 1, 'nbf' : 64 , 'prestimetest': 1, 'interpresdelay': 0, 'imagesize': 31, # 28*28 'nbiter': 10000000, 'learningrate': 1e-5, 'print_every': 10, 'rngseed':0 } NBTESTCLASSES = 100 #ttype = torch.FloatTensor; ttype = torch.cuda.FloatTensor; # Generate the full list of inputs, labels, and the target label for an episode def generateInputsLabelsAndTarget(params, imagedata, test=False): #print(("Input Boost:", params['inputboost'])) #params['nbsteps'] = params['nbshots'] * ((params['prestime'] + params['interpresdelay']) * params['nbclasses']) + params['prestimetest'] inputT = np.zeros((params['nbsteps'], 1, 1, params['imagesize'], params['imagesize'])) #inputTensor, initially in numpy format... Note dimensions: number of steps x batchsize (always 1) x NbChannels (also 1) x h x w labelT = np.zeros((params['nbsteps'], 1, params['nbclasses'])) #labelTensor, initially in numpy format... patterns=[] if test: cats = np.random.permutation(np.arange(len(imagedata) - NBTESTCLASSES, len(imagedata)))[:params['nbclasses']] # Which categories to use for this *testing* episode? else: cats = np.random.permutation(np.arange(len(imagedata) - NBTESTCLASSES))[:params['nbclasses']] # Which categories to use for this *training* episode? #print("Test is", test, ", cats are", cats) #cats = np.array(range(params['nbclasses'])) + 10 cats = np.random.permutation(cats) #print(cats) # We show one picture of each category, with labels, then one picture of one of these categories as a test, without label # But each of the categories may undergo rotation by 0, 90, 180 or 270deg # NOTE: We randomly assign one rotation to all the possible categories, not just the ones selected for the episode - it makes the coding simpler rots = np.random.randint(4, size=len(imagedata)) #rots.fill(0) testcat = random.choice(cats) # select the class on which we'll test in this episode unpermcats = cats.copy() # Inserting the character images and labels in the input tensor at the proper places location = 0 for nc in range(params['nbshots']): np.random.shuffle(cats) # Presentations occur in random order for ii, catnum in enumerate(cats): #print(catnum) p = random.choice(imagedata[catnum]) for nr in range(rots[catnum]): p = np.rot90(p) p = skimage.transform.resize(p, (31, 31)) for nn in range(params['prestime']): #numi =nc * (params['nbclasses'] * (params['prestime']+params['interpresdelay'])) + ii * (params['prestime']+params['interpresdelay']) + nn inputT[location][0][0][:][:] = p[:][:] labelT[location][0][np.where(unpermcats == catnum)] = 1 #if nn == 0: # print(labelT[location][0]) location += 1 location += params['interpresdelay'] # Inserting the test character p = random.choice(imagedata[testcat]) for nr in range(rots[testcat]): p = np.rot90(p) p = skimage.transform.resize(p, (31, 31)) for nn in range(params['prestimetest']): inputT[location][0][0][:][:] = p[:][:] location += 1 # Generating the test label testlabel = np.zeros(params['nbclasses']) testlabel[np.where(unpermcats == testcat)] = 1 #print(testcat, testlabel) #pdb.set_trace() assert(location == params['nbsteps']) inputT = torch.from_numpy(inputT).type(ttype) # Convert from numpy to Tensor labelT = torch.from_numpy(labelT).type(ttype) targetL = torch.from_numpy(testlabel).type(ttype) return inputT, labelT, targetL def train(paramdict=None): print("Initializing random seeds") np.random.seed(0); random.seed(0); torch.manual_seed(0) print("Starting testing...") params = {} params.update(defaultParams) if paramdict: params.update(paramdict) #pdb.set_trace() print("Loading Omniglot data...") imagedata = [] imagefilenames=[] for basedir in ('./omniglot-master/python/images_background/', './omniglot-master/python/images_evaluation/'): alphabetdirs = glob.glob(basedir+'*') print(alphabetdirs[:4]) for alphabetdir in alphabetdirs: chardirs = glob.glob(alphabetdir+"/*") for chardir in chardirs: chardata = [] charfiles = glob.glob(chardir+'/*') for fn in charfiles: filedata = plt.imread(fn) chardata.append(filedata) imagedata.append(chardata) imagefilenames.append(fn) # imagedata is now a list of lists of numpy arrays # imagedata[CharactertNumber][FileNumber] -> numpy(105,105) np.random.shuffle(imagedata) # Randomize order of characters print(len(imagedata)) print(imagedata[1][2].shape) print("Data loaded!") successrates = [] totaliter = 0 totalmistakes = 0 for myseed in range(10): #suffix="_Wactiv_tanh_alpha_free_flare_0_gamma_0.75_imgsize_31_ipd_0_lr_3e-05_nbclasses_5_nbf_64_nbiter_5000000_nbshots_1_prestime_1_prestimetest_1_rule_oja_steplr_1000000.0_rngseed_"+str(myseed) suffix="_Wactiv_tanh_alpha_free_flare_0_gamma_0.666_imgsize_31_ipd_0_lr_3e-05_nbclasses_5_nbf_64_nbiter_5000000_nbshots_1_prestime_1_prestimetest_1_rule_oja_steplr_1000000.0_rngseed_"+str(myseed)+"_5000000" with open('./tmp/results'+suffix+'.dat', 'rb') as fo: tmpw = torch.nn.Parameter(torch.from_numpy(pickle.load(fo)).type(ttype)) tmpalpha = torch.nn.Parameter(torch.from_numpy(pickle.load(fo)).type(ttype)) tmpeta = torch.nn.Parameter(torch.from_numpy(pickle.load(fo)).type(ttype)) tmplss = pickle.load(fo) paramdictLoadedFromFile = pickle.load(fo) params.update(paramdictLoadedFromFile) print("Initializing network") net = Network(params) #net.cuda() print ("Size of all optimized parameters:", [x.size() for x in net.parameters()]) allsizes = [torch.numel(x.data.cpu()) for x in net.parameters()] print ("Size (numel) of all optimized elements:", allsizes) print ("Total size (numel) of all optimized elements:", sum(allsizes)) print("Passed params: ", params) print(platform.uname()) sys.stdout.flush() params['nbsteps'] = params['nbshots'] * ((params['prestime'] + params['interpresdelay']) * params['nbclasses']) + params['prestimetest'] # Total number of steps per episode net.load_state_dict(torch.load('./tmp/torchmodel'+suffix + '.txt')) params['nbiter'] = 100 # Initialize random seeds ; not sure if really useful here... #print("Setting random seed to", params['rngseed']) #np.random.seed(params['rngseed']); random.seed(params['rngseed']); torch.manual_seed(params['rngseed']) #print(click.get_current_context().params) total_loss = 0.0 #print("Initializing optimizer") ##optimizer = torch.optim.Adam([net.w, net.alpha, net.eta], lr=params['learningrate']) #optimizer = torch.optim.Adam(net.parameters(), lr=params['learningrate']) all_losses = [] #print_every = 20 nowtime = time.time() print("Starting episodes...") sys.stdout.flush() nbmistakes = 0 for numiter in range(params['nbiter']): hebb = net.initialZeroHebb() #optimizer.zero_grad() is_test_step = 1 inputs, labels, target = generateInputsLabelsAndTarget(params, imagedata, test=is_test_step) for numstep in range(params['nbsteps']): y, hebb = net(Variable(inputs[numstep], requires_grad=False), Variable(labels[numstep], requires_grad=False), hebb) #loss = (y[0] - Variable(target, requires_grad=False)).pow(2).sum() criterion = torch.nn.BCELoss() loss = criterion(y[0], Variable(target, requires_grad=False)) #if is_test_step == False: # loss.backward() # optimizer.step() lossnum = loss.data[0] #total_loss += lossnum if is_test_step: total_loss = lossnum if is_test_step: # (numiter+1) % params['print_every'] == 0: print(numiter, "====") td = target.cpu().numpy() yd = y.data.cpu().numpy()[0] #print("y: ", yd[:10]) #print("target: ", td[:10]) if np.argmax(td) != np.argmax(yd): print("Mistake!") nbmistakes += 1 #print("target: ", target.unsqueeze(0)[0][:10]) absdiff = np.abs(td-yd) #print("Mean / median / max abs diff:", np.mean(absdiff), np.median(absdiff), np.max(absdiff)) #print("Correlation (full / sign): ", np.corrcoef(td, yd)[0][1], np.corrcoef(np.sign(td), np.sign(yd))[0][1]) #print inputs[numstep] previoustime = nowtime nowtime = time.time() #print("Time spent on last", params['print_every'], "iters: ", nowtime - previoustime) #total_loss /= params['print_every'] #print("Mean loss over last", params['print_every'], "iters:", total_loss) #print("Loss on single withheld-data episode:", lossnum) all_losses.append(total_loss) #print ("Eta: ", net.eta.data.cpu().numpy()) sys.stdout.flush() sys.stderr.flush() total_loss = 0 all_losses = np.array(all_losses) print("Mean / std all losses :", np.mean(all_losses), np.std(all_losses)) print("1st Quartile / median / 3rd Quartile all losses :", np.percentile(all_losses, 25), np.percentile(all_losses, 50), np.percentile(all_losses, 75)) print("Max of all losses :", np.max(all_losses)) print("Nb of mistakes :", nbmistakes, "over", numiter+1, "trials - (", 100.0 - 100.0 * nbmistakes / (numiter+1), " % correct )") successrates.append(100.0 - 100.0 * nbmistakes / (numiter+1)) totalmistakes += nbmistakes totaliter += params['nbiter'] print ("Mean / stdev success rate across runs: ", np.mean(successrates), np.std(successrates)) totalsuccessrate = 1.0 - totalmistakes / totaliter pointestCI = 1.96 * np.sqrt(totalsuccessrate * (1.0 - totalsuccessrate) / totaliter) print ("Success % across all trials (95% CI point estimate):", 100.0 * totalsuccessrate, "+/-", 100.0 * pointestCI) print (totalmistakes, "mistakes out of ", totaliter, "trials") print ("Median success rate across runs: ", np.median(successrates)) @click.command() @click.option('--nbclasses', default=defaultParams['nbclasses']) @click.option('--nbshots', default=defaultParams['nbshots']) @click.option('--prestime', default=defaultParams['prestime']) @click.option('--prestimetest', default=defaultParams['prestimetest']) @click.option('--interpresdelay', default=defaultParams['interpresdelay']) @click.option('--nbiter', default=defaultParams['nbiter']) @click.option('--learningrate', default=defaultParams['learningrate']) @click.option('--print_every', default=defaultParams['print_every']) @click.option('--rngseed', default=defaultParams['rngseed']) def main(nbclasses, nbshots, prestime, prestimetest, interpresdelay, nbiter, learningrate, print_every, rngseed): train(paramdict=dict(click.get_current_context().params)) #print(dict(click.get_current_context().params)) if __name__ == "__main__": #train() main() ================================================ FILE: opus.docker ================================================ #tmiconi_rl #latest #. #FROM localhost:5000/opus-deep-learning:master-test-2017_9_7_20_56_10 FROM opus-deep-learning-py3:master-prod-2019_2_5_4_54_39 #FROM opus-deep-learning:master--2018_9_20_18_2_31 RUN mkdir /home/work COPY ./sr/*.py /home/work/sr/ COPY ./sr/*.md /home/work/sr/ COPY ./maze/*.py /home/work/maze/ COPY ./maze/*.md /home/work/maze/ COPY ./simplemaze/*.py /home/work/simplemaze/ COPY ./simplemaze/*.md /home/work/simplemaze/ COPY ./awd-lstm-lm/*.py /home/work/awd-lstm-lm/ COPY ./awd-lstm-lm/*.sh /home/work/awd-lstm-lm/ COPY ./awd-lstm-lm/*.md /home/work/awd-lstm-lm/ #COPY ./*.py /home/work/ #COPY ./*.sh /home/work/ #COPY ./*.md /home/work/ ENV LC_ALL C.UTF-8 ENV LANG C.UTF-8 ================================================ FILE: request_devbox.json ================================================ { "dockerImage":"tmiconi_rl", "tag":"master-test-2019_5_21_10_41_12", "cpus":2.0, "ramMB":26000, "gpus":1, "diskMB":8000, "cluster":"opusprodda1", "environment":"devel", "user":"tmiconi", "resourcePool": "/ailabs/p1/tmiconi", "instances":1, "isService":false, "cronSchedule":"", "custom":{}, "application":"testversion", "maxRetries":1, "constraints":{"sku":"p6000"}, "accessTypes":[], "dependencies":[], "cronCollisionPolicy":"CANCEL_NEW", "emailOnFail":[], "emailOnSucceed":[] } ================================================ FILE: request_lstm.json ================================================ { "dockerImage":"tmiconi_rl", "tag":"master-test-2019_5_21_10_41_12", "name":"newcode_PLASTICLSTM_agdiv1149_opus_alphatype_perneuron_modultype_modplasth2mod_modulout_fanout_asgdtime_125_1149n_5run", "cpus":2.0, "cmdLine":"export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \u0026\u0026 PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/nvidia/bin:/opt/hadoop/latest/bin \u0026\u0026 export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64/ \u0026\u0026 export LC_ALL=C.UTF-8 \u0026\u0026 export LANG=C.UTF-8 \u0026\u0026 cd /home/work/awd-lstm-lm \u0026\u0026 apt-get install unzip \u0026\u0026 sh ./getdata.sh \u0026\u0026 python3 main.py --batch_size 6 --data data/penn --dropouti 0.4 --dropouth 0.25 --epoch 500 --save PTB.pt --wdrop 0 --model PLASTICLSTM --modultype modplasth2mod --modulout fanout --nhid 1149 --alphatype perneuron --asgdtime 125 --agdiv 1149 --seed {{mesos.instance}} ", "ramMB":25000, "gpus":1, "diskMB":6000, "cluster":"opusprodda1", "environment":"devel", "user":"tmiconi", "resourcePool": "/ailabs/p1/tmiconi", "instances":5, "isService":false, "cronSchedule":"", "custom":{}, "application":"testversion", "maxRetries":1, "constraints":{"sku":"p6000"}, "accessTypes":[], "dependencies":[], "cronCollisionPolicy":"CANCEL_NEW", "emailOnFail":[], "emailOnSucceed":[] } ================================================ FILE: request_lstm_simple.json ================================================ { "dockerImage":"tmiconi_rl", "tag":"master-test-2019_5_21_10_41_12", "name":"newcode_SIMPLEPLASTICLSTM_agdiv1149_opus_alphatype_perneuron_modultype_modplasth2mod_modulout_fanout_asgdtime_125_1149n_5run", "cpus":2.0, "cmdLine":"export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \u0026\u0026 PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/nvidia/bin:/opt/hadoop/latest/bin \u0026\u0026 export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64/ \u0026\u0026 export LC_ALL=C.UTF-8 \u0026\u0026 export LANG=C.UTF-8 \u0026\u0026 cd /home/work/awd-lstm-lm \u0026\u0026 apt-get install unzip \u0026\u0026 sh ./getdata.sh \u0026\u0026 python3 main.py --batch_size 6 --data data/penn --dropouti 0.4 --dropouth 0.25 --epoch 500 --save PTB.pt --wdrop 0 --model SIMPLEPLASTICLSTM --modultype modplasth2mod --modulout fanout --nhid 1149 --alphatype perneuron --asgdtime 125 --agdiv 1149 --seed {{mesos.instance}} ", "ramMB":25000, "gpus":1, "diskMB":6000, "cluster":"opusprodda1", "environment":"devel", "user":"tmiconi", "resourcePool": "/ailabs/p1/tmiconi", "instances":5, "isService":false, "cronSchedule":"", "custom":{}, "application":"testversion", "maxRetries":1, "constraints":{"sku":"p6000"}, "accessTypes":[], "dependencies":[], "cronCollisionPolicy":"CANCEL_NEW", "emailOnFail":[], "emailOnSucceed":[] } ================================================ FILE: simple/.gitignore ================================================ *.txt *.dat ================================================ FILE: simple/OpusHdfsCopy.py ================================================ # Uber-only code for interacting with hdfs # # Copyright (c) 2018 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import os.path def checkHdfs(): return os.path.isfile('/opt/hadoop/latest/bin/hdfs') def transferFileToHdfsPath(sourcepath, targetpath): hdfspath = targetpath targetdir = os.path.dirname(targetpath) os.system('/opt/hadoop/latest/bin/hdfs dfs -mkdir -p {}'.format(targetdir)) result = os.system( '/opt/hadoop/latest/bin/hdfs dfs -copyFromLocal -f {} {}'.format(sourcepath, hdfspath) ) if result != 0: raise OSError('Cannot copyFromLocal {} {} returned {}'.format(sourcepath, hdfspath, result)) def transferFileToHdfsDir(sourcepath, targetdir): hdfspath = os.path.join(targetdir, os.path.basename(sourcepath)) os.system('/opt/hadoop/latest/bin/hdfs dfs -mkdir -p {}'.format(targetdir)) result = os.system( '/opt/hadoop/latest/bin/hdfs dfs -copyFromLocal -f {} {}'.format(sourcepath, hdfspath) ) if result != 0: raise OSError('Cannot copyFromLocal {} {} returned {}'.format(sourcepath, hdfspath, result)) ================================================ FILE: simple/README.md ================================================ # Pattern memorization and completion This code implements the pattern completion task. Five binary pattern of 1000 elements are shown once each, and then a degraded copy of one of these patterns (with half the elements zeroed out) is presented and must be completed. The `simplest.py` program is the simplest, fully functional implementation of this task with a recurrent plastic network. This program is designed to provide an easily understood example for differentiable plasticity. It requires PyTorch, but does not use a GPU. `simple.py` is a slightly more elaborate version that can make use of a GPU. The `full.py` and `lstm.py` programs have more options and can be used to compare different architectures. To produce the results shown in the paper: ``` python3 full.py --patternsize 50 --nbaddneurons 2000 --nbprescycles 1 --nbpatterns 2 --prestime 3 --interpresdelay 1 --nbiter 1000000 --lr 3e-5 --type nonplastic python3 full.py --patternsize 50 --nbaddneurons 0 --nbprescycles 1 --nbpatterns 2 --prestime 3 --interpresdelay 1 --nbiter 1000000 --lr 3e-4 --type plastic python3 lstm.py --patternsize 50 --nbaddneurons 1949 --nbprescycles 1 --nbpatterns 2 --prestime 3 --interpresdelay 1 --nbiter 1000000 --clamp 1 --lr 3e-5 ``` ================================================ FILE: simple/full.py ================================================ # Differentiable plasticity: binary pattern memorization and reconstruction # # Copyright (c) 2018 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # This more flexible implementation includes both plastic and non-plastic RNNs. LSTM code is sufficiently different that it makes more sense to put it in a different file. # Also includes some Uber-specific stuff for file transfer. Commented out by default. # Parameters optimized for non-plastic architectures (esp. LSTM): # --patternsize 50 --nbaddneurons 2000 --nbprescycles 1 --nbpatterns 2 --prestime 3 --interpresdelay 1 --nbiter 1000000 --lr 3e-5 # For comparing plastic and non-plastic, we use these for both (though the plastic architecture strongly prefers the default ones) # Plastic networks can learn with lr=3e-4. # The default parameters are those for the plastic RNN on the 1000-bit task (same as simple.py) import argparse import torch import torch.nn as nn from torch.autograd import Variable import numpy as np from numpy import random import torch.nn.functional as F from torch import optim import random import sys import pickle import pdb import time # Uber-only (comment out if not at Uber): import OpusHdfsCopy from OpusHdfsCopy import transferFileToHdfsDir, checkHdfs # Parsing command-line arguments params = {}; params['rngseed'] = 0 parser = argparse.ArgumentParser() parser.add_argument("--rngseed", type=int, help="random seed", default=0) parser.add_argument("--nbiter", type=int, help="number of episodes", default=2000) parser.add_argument("--nbaddneurons", type=int, help="number of additional neurons", default=0) parser.add_argument("--lr", type=float, help="learning rate of Adam optimizer", default=3e-4) parser.add_argument("--patternsize", type=int, help="size of the binary patterns", default=1000) parser.add_argument("--nbpatterns", type=int, help="number of patterns to memorize", default=5) parser.add_argument("--nbprescycles", type=int, help="number of presentation cycles", default=2) parser.add_argument("--prestime", type=int, help="number of time steps for each pattern presentation", default=6) parser.add_argument("--interpresdelay", type=int, help="number of time steps between each pattern presentation (with zero input)", default=4) parser.add_argument("--type", help="network type ('plastic' or 'nonplastic')", default='plastic') args = parser.parse_args(); argvars = vars(args); argdict = { k : argvars[k] for k in argvars if argvars[k] != None } params.update(argdict) PATTERNSIZE = params['patternsize'] NBNEUR = PATTERNSIZE + params['nbaddneurons'] + 1 # NbNeur = Pattern Size + additional neurons + 1 "bias", fixed-output neuron (bias neuron not needed for this task, but included for completeness) ETA = .01 # The "learning rate" of plastic connections ADAMLEARNINGRATE = params['lr'] PROBADEGRADE = .5 # Proportion of bits to zero out in the target pattern at test time NBPATTERNS = params['nbpatterns'] # The number of patterns to learn in each episode NBPRESCYCLES = params['nbprescycles'] # Number of times each pattern is to be presented PRESTIME = params['prestime'] # Number of time steps for each presentation PRESTIMETEST = PRESTIME # Same thing but for the final test pattern INTERPRESDELAY = params['interpresdelay'] # Duration of zero-input interval between presentations NBSTEPS = NBPRESCYCLES * ((PRESTIME + INTERPRESDELAY) * NBPATTERNS) + PRESTIMETEST # Total number of steps per episode #ttype = torch.FloatTensor; # For CPU ttype = torch.cuda.FloatTensor; # For GPU # Generate the full list of inputs for an episode. The inputs are returned as a PyTorch tensor of shape NbSteps x 1 x NbNeur def generateInputsAndTarget(): inputT = np.zeros((NBSTEPS, 1, NBNEUR)) #inputTensor, initially in numpy format... # Create the random patterns to be memorized in an episode seedp = np.ones(PATTERNSIZE); seedp[:PATTERNSIZE//2] = -1 patterns=[] for nump in range(NBPATTERNS): p = np.random.permutation(seedp) patterns.append(p) # Now 'patterns' contains the NBPATTERNS patterns to be memorized in this episode - in numpy format # Choosing the test pattern, partially zero'ed out, that the network will have to complete testpattern = random.choice(patterns).copy() preservedbits = np.ones(PATTERNSIZE); preservedbits[:int(PROBADEGRADE * PATTERNSIZE)] = 0; np.random.shuffle(preservedbits) degradedtestpattern = testpattern * preservedbits # Inserting the inputs in the input tensor at the proper places for nc in range(NBPRESCYCLES): np.random.shuffle(patterns) for ii in range(NBPATTERNS): for nn in range(PRESTIME): numi =nc * (NBPATTERNS * (PRESTIME+INTERPRESDELAY)) + ii * (PRESTIME+INTERPRESDELAY) + nn inputT[numi][0][:PATTERNSIZE] = patterns[ii][:] # Inserting the degraded pattern for nn in range(PRESTIMETEST): inputT[-PRESTIMETEST + nn][0][:PATTERNSIZE] = degradedtestpattern[:] for nn in range(NBSTEPS): inputT[nn][0][-1] = 1.0 # Bias neuron. inputT[nn] *= 20.0 # Strengthen inputs inputT = torch.from_numpy(inputT).type(ttype) # Convert from numpy to Tensor target = torch.from_numpy(testpattern).type(ttype) return inputT, target class NETWORK(nn.Module): def __init__(self): super(NETWORK, self).__init__() # Notice that the vectors are row vectors, and the matrices are transposed wrt the usual order, following apparent pytorch conventions # Each *column* of w targets a single output neuron self.w = Variable(.01 * torch.randn(NBNEUR, NBNEUR).type(ttype), requires_grad=True) # The matrix of fixed (baseline) weights self.alpha = Variable(.01 * torch.randn(NBNEUR, NBNEUR).type(ttype), requires_grad=True) # The matrix of plasticity coefficients self.eta = Variable(.01 * torch.ones(1).type(ttype), requires_grad=True) # The eta coefficient is learned self.zeroDiagAlpha() # No plastic autapses def forward(self, input, yin, hebb): # Run the network for one timestep if params['type'] == 'plastic': yout = F.tanh( yin.mm(self.w + torch.mul(self.alpha, hebb)) + input ) hebb = (1 - self.eta) * hebb + self.eta * torch.bmm(yin.unsqueeze(2), yout.unsqueeze(1))[0] # bmm used to implement outer product with the help of unsqueeze (i.e. added empty dimensions) elif params['type'] == 'nonplastic': yout = F.tanh( yin.mm(self.w) + input ) else: raise ValueError("Wrong network type!") return yout, hebb def initialZeroState(self): return Variable(torch.zeros(1, NBNEUR).type(ttype)) def initialZeroHebb(self): return Variable(torch.zeros(NBNEUR, NBNEUR).type(ttype)) def zeroDiagAlpha(self): # Zero out the diagonal of the matrix of alpha coefficients: no plastic autapses self.alpha.data -= torch.diag(torch.diag(self.alpha.data)) np.set_printoptions(precision=3) np.random.seed(params['rngseed']); random.seed(params['rngseed']); torch.manual_seed(params['rngseed']) net = NETWORK() optimizer = torch.optim.Adam([net.w, net.alpha, net.eta], lr=ADAMLEARNINGRATE) total_loss = 0.0; all_losses = [] print_every = 100 save_every = 1000 nowtime = time.time() suffix = "binary_"+"".join([str(x)+"_" if pair[0] is not 'nbsteps' and pair[0] is not 'rngseed' and pair[0] is not 'save_every' and pair[0] is not 'test_every' else '' for pair in sorted(zip(params.keys(), params.values()), key=lambda x:x[0] ) for x in pair])[:-1] + "_rngseed_" + str(params['rngseed']) # Turning the parameters into a nice suffix for filenames for numiter in range(params['nbiter']): # Initialize network for each episode y = net.initialZeroState() hebb = net.initialZeroHebb() optimizer.zero_grad() # Generate the inputs and target pattern for this episode inputs, target = generateInputsAndTarget() # Run the episode! for numstep in range(NBSTEPS): y, hebb = net(Variable(inputs[numstep], requires_grad=False), y, hebb) # Compute loss for this episode (last step only) loss = (y[0][:PATTERNSIZE] - Variable(target, requires_grad=False)).pow(2).sum() # Apply backpropagation to adapt basic weights and plasticity coefficients loss.backward() optimizer.step() #net.zeroDiagAlpha() # Removes plastic autapses - turned out to be unneeded # That's it for the actual algorithm. # Print statistics, save files #lossnum = loss.data[0] # Saved loss is the actual training loss (MSE) to = target.cpu().numpy(); yo = y.data.cpu().numpy()[0][:PATTERNSIZE]; z = (np.sign(yo) != np.sign(to)); lossnum = np.mean(z) # Saved loss is the error rate total_loss += lossnum if (numiter+1) % print_every == 0: print((numiter, "====")) print(target.cpu().numpy()[-10:]) # Target pattern to be reconstructed print(inputs.cpu().numpy()[numstep][0][-10:]) # Last input contains the degraded pattern fed to the network at test time print(y.data.cpu().numpy()[0][-10:]) # Final output of the network previoustime = nowtime nowtime = time.time() print("Time spent on last", print_every, "iters: ", nowtime - previoustime) total_loss /= print_every all_losses.append(total_loss) print("Mean loss over last", print_every, "iters:", total_loss) print("") if (numiter+1) % save_every == 0: with open('outputs_'+suffix+'.dat', 'wb') as fo: pickle.dump(net.w.data.cpu().numpy(), fo) pickle.dump(net.alpha.data.cpu().numpy(), fo) pickle.dump(y.data.cpu().numpy(), fo) # The final y for this episode pickle.dump(all_losses, fo) with open('loss_'+suffix+'.txt', 'w') as fo: for item in all_losses: fo.write("%s\n" % item) # Uber-only if checkHdfs(): print("Transfering to HDFS...") transferFileToHdfsDir('loss_'+suffix+'.txt', '/ailabs/tmiconi/simple/') #transferFileToHdfsDir('results_simple_'+str(params['rngseed'])+'.dat', '/ailabs/tmiconi/exp/') total_loss = 0 ================================================ FILE: simple/lstm.py ================================================ # Memorization of two 50-bit binary patterns per episode, with LSTMs. Takes a very long time to learn the task, and even then imperfectly. 2050 neurons (fewer neurons = worse performance). # # # Copyright (c) 2018 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import argparse import torch import torch.nn as nn from torch.autograd import Variable import numpy as np from numpy import random import torch.nn.functional as F from torch import optim import random import sys import pickle as pickle import pdb import time # Uber-only (comment out if not at Uber) import OpusHdfsCopy from OpusHdfsCopy import transferFileToHdfsDir, checkHdfs # Parsing command-line arguments params = {}; params['rngseed'] = 0 parser = argparse.ArgumentParser() parser.add_argument("--rngseed", type=int, help="random seed", default=0) parser.add_argument("--nbiter", type=int, help="number of episodes", default=2000) parser.add_argument("--clamp", type=int, help="whether inputs are clamping (1) or not (0)", default=1) parser.add_argument("--nbaddneurons", type=int, help="number of additional neurons", default=0) parser.add_argument("--lr", type=float, help="learning rate of Adam optimizer", default=3e-4) parser.add_argument("--patternsize", type=int, help="size of the binary patterns", default=1000) parser.add_argument("--nbpatterns", type=int, help="number of patterns to memorize", default=5) parser.add_argument("--nbprescycles", type=int, help="number of presentation cycles", default=2) parser.add_argument("--prestime", type=int, help="number of time steps for each pattern presentation", default=6) parser.add_argument("--interpresdelay", type=int, help="number of time steps between each pattern presentation (with zero input)", default=4) parser.add_argument("--type", help="network type ('plastic' or 'nonplastic')", default='plastic') args = parser.parse_args(); argvars = vars(args); argdict = { k : argvars[k] for k in argvars if argvars[k] != None } params.update(argdict) PATTERNSIZE = params['patternsize'] NBHIDDENNEUR = PATTERNSIZE + params['nbaddneurons'] + 1 # NbNeur = Pattern Size + additional neurons + 1 "bias", fixed-output neuron (bias neuron not needed for this task, but included for completeness) ETA = .01 # The "learning rate" of plastic connections; not used for LSTMs ADAMLEARNINGRATE = params['lr'] PROBADEGRADE = .5 # Proportion of bits to zero out in the target pattern at test time CLAMPING = params['clamp'] NBPATTERNS = params['nbpatterns'] # The number of patterns to learn in each episode NBPRESCYCLES = params['nbprescycles'] # Number of times each pattern is to be presented PRESTIME = params['prestime'] # Number of time steps for each presentation PRESTIMETEST = PRESTIME # Same thing but for the final test pattern INTERPRESDELAY = params['interpresdelay'] # Duration of zero-input interval between presentations NBSTEPS = NBPRESCYCLES * ((PRESTIME + INTERPRESDELAY) * NBPATTERNS) + PRESTIMETEST # Total number of steps per episode RNGSEED = params['rngseed'] #PATTERNSIZE = 50 # ## Note: For LSTM, there are PATTERNSIZE input and output neurons, and NBHIDDENNEUR neurons in the hidden recurrent layer ##NBNEUR = PATTERNSIZE # NbNeur = Pattern Size + 1 "bias", fixed-output neuron (bias neuron not needed for this task, but included for completeness) #NBHIDDENNEUR = 2000 # 1000 takes longer # ##ETA = .01 # The "learning rate" of plastic connections. Not used for LSTMs. #ADAMLEARNINGRATE = 3e-5 # 1e-4 # 3e-5 works better in the long run. 1e-4 OK. 3e-4 fails. #RNGSEED = 0 # #PROBADEGRADE = .5 # Proportion of bits to zero out in the target pattern at test time #NBPATTERNS = 2 # The number of patterns to learn in each episode #NBPRESCYCLES = 1 # Number of times each pattern is to be presented #PRESTIME = 3 # Number of time steps for each presentation #PRESTIMETEST = 3 # Same thing but for the final test pattern #INTERPRESDELAY = 1 # Duration of zero-input interval between presentations #NBSTEPS = NBPRESCYCLES * ((PRESTIME + INTERPRESDELAY) * NBPATTERNS) + PRESTIMETEST # Total number of steps per episode #ttype = torch.FloatTensor; ttype = torch.cuda.FloatTensor; # Generate the full list of inputs for an episode. The inputs are returned as a PyTorch tensor of shape NbSteps x 1 x NbNeur def generateInputsAndTarget(): #inputT = np.zeros((NBSTEPS, 1, NBNEUR)) #inputTensor, initially in numpy format... inputT = np.zeros((NBSTEPS, 1, PATTERNSIZE)) #inputTensor, initially in numpy format... # Create the random patterns to be memorized in an episode seedp = np.ones(PATTERNSIZE); seedp[:PATTERNSIZE//2] = -1 patterns=[] for nump in range(NBPATTERNS): p = np.random.permutation(seedp) patterns.append(p) # Now 'patterns' contains the NBPATTERNS patterns to be memorized in this episode - in numpy format # Choosing the test pattern, partially zero'ed out, that the network will have to complete testpattern = random.choice(patterns).copy() #testpattern = patterns[1].copy() preservedbits = np.ones(PATTERNSIZE); preservedbits[:int(PROBADEGRADE * PATTERNSIZE)] = 0; np.random.shuffle(preservedbits) degradedtestpattern = testpattern * preservedbits # Inserting the inputs in the input tensor at the proper places for nc in range(NBPRESCYCLES): np.random.shuffle(patterns) for ii in range(NBPATTERNS): for nn in range(PRESTIME): numi =nc * (NBPATTERNS * (PRESTIME+INTERPRESDELAY)) + ii * (PRESTIME+INTERPRESDELAY) + nn inputT[numi][0][:PATTERNSIZE] = patterns[ii][:] # Inserting the degraded pattern for nn in range(PRESTIMETEST): inputT[-PRESTIMETEST + nn][0][:PATTERNSIZE] = degradedtestpattern[:] for nn in range(NBSTEPS): #inputT[nn][0][-1] = 1.0 # Bias neuron. inputT[nn] *= 100.0 # Strengthen inputs inputT = torch.from_numpy(inputT).type(ttype) # Convert from numpy to Tensor target = torch.from_numpy(testpattern).type(ttype) return inputT, target class NETWORK(nn.Module): def __init__(self): super(NETWORK, self).__init__() self.lstm = torch.nn.LSTM(PATTERNSIZE, NBHIDDENNEUR).cuda() #input size, hidden size self.hidden = self.initialZeroState() # Note that the "hidden state" is a tuple (hidden state, cells state) def forward(self, inputs,): # Run the network over entire sequence of inputs self.hidden = self.initialZeroState() if CLAMPING: # This code allows us to make the inputs on the LSTM "clamping", # i.e. neurons that receive an input have their output clamped at # this value, to make it similar to the RNN architectures. # # Note that you get worse results if you don't use it ! ("CLAMPING = 0" above) (clamping automatically reduces chance error to ~.25, since all input bits are always correct) # #self.lstm.weight_hh_l0.data.fill_(0) #self.lstm.weight_ih_l0.data.fill_(0) self.lstm.bias_hh_l0.data.fill_(0) #self.lstm.bias_ih_l0.data.fill_(0) for ii in range(PATTERNSIZE): self.lstm.weight_ih_l0.data[2*NBHIDDENNEUR + ii].fill_(0) self.lstm.weight_ih_l0.data[2*NBHIDDENNEUR + ii][ii] = 10.0 # Trick to make inputs clamping on the cells, for fair comparison (need to also set input gates...) self.lstm.bias_ih_l0.data[0*NBHIDDENNEUR+ ii]= 10.0 # bias to input gate self.lstm.bias_ih_l0.data[1*NBHIDDENNEUR+ ii]= -1000.0 # bias to forget gate (actually a persistence gate? - sigmoid, so to set it to 0, put a massive negative bias) self.lstm.bias_ih_l0.data[2*NBHIDDENNEUR+ ii]= 0 # bias to cell gate self.lstm.bias_ih_l0.data[3*NBHIDDENNEUR+ ii]= 10.0 # bias to output gate; sigmoid lstm_out, self.hidden = self.lstm(inputs, self.hidden) #o = self.h2o(lstm_out) #.view(NBSTEPS, -1)) #outputz = F.tanh(o) outputz = lstm_out return outputz #yout = F.tanh( yin.mm(self.w + torch.mul(self.alpha, hebb)) + input ) #hebb = (1 - ETA) * hebb + ETA * torch.bmm(yin.unsqueeze(2), yout.unsqueeze(1))[0] # bmm used to implement outer product with the help of unsqueeze (i.e. added empty dimensions) #return yout, hebb def initialZeroState(self): return (Variable(torch.zeros(1, 1, NBHIDDENNEUR).type(ttype)), Variable(torch.zeros(1, 1, NBHIDDENNEUR).type(ttype))) if len(sys.argv) == 2: RNGSEED = int(sys.argv[1]) print("Setting RNGSEED to "+str(RNGSEED)) np.set_printoptions(precision=3) np.random.seed(RNGSEED); random.seed(RNGSEED); torch.manual_seed(RNGSEED) net = NETWORK() optimizer = torch.optim.Adam(net.parameters(), lr=ADAMLEARNINGRATE) total_loss = 0.0; all_losses = [] print_every = 100 save_every = 1000 nowtime = time.time() for numiter in range(params['nbiter']): optimizer.zero_grad() net.hidden = net.initialZeroState() # Generate the inputs and target pattern for this episode inputs, target = generateInputsAndTarget() # Run the episode! y = net(Variable(inputs, requires_grad=False))[-1][0] # Compute loss for this episode (last step only) loss = (y[:PATTERNSIZE] - Variable(target, requires_grad=False)).pow(2).sum() #pdb.set_trace() # Apply backpropagation to adapt basic weights and plasticity coefficients loss.backward() optimizer.step() # That's it for the actual algorithm. # Print statistics, save files #lossnum = loss.data[0] yo = y.data.cpu().numpy()[:PATTERNSIZE] to = target.cpu().numpy() z = (np.sign(yo) != np.sign(to)) lossnum = np.mean(z) total_loss += lossnum if (numiter+1) % print_every == 0: print((numiter, "====")) print(target.cpu().numpy()[:10]) # Target pattern to be reconstructed print(inputs.cpu().numpy()[-1][0][:10]) # Last input contains the degraded pattern fed to the network at test time print(y.data.cpu().numpy()[:10]) # Final output of the network previoustime = nowtime nowtime = time.time() print("Time spent on last", print_every, "iters: ", nowtime - previoustime) total_loss /= print_every all_losses.append(total_loss) print("Mean loss over last", print_every, "iters:", total_loss) print("") if (numiter+1) % save_every == 0: fname = 'loss_binary_lstm_nbiter_'+str(params['nbiter'])+'_nbhneur_'+str(NBHIDDENNEUR)+'_clamp_'+str(CLAMPING)+'_lr_'+str(ADAMLEARNINGRATE)+'_prestime_'+str(PRESTIME)+'_ipd_'+str(INTERPRESDELAY)+'_rngseed_'+str(RNGSEED)+'.txt' with open(fname, 'w') as fo: for item in all_losses: fo.write("%s\n" % item) # Uber-only (comment out if not at Uber) if checkHdfs(): print("Transfering to HDFS...") transferFileToHdfsDir(fname, '/ailabs/tmiconi/simple/') total_loss = 0 ================================================ FILE: simple/opus.docker ================================================ #tmiconi_rl #latest #. #FROM localhost:5000/opus-deep-learning:master-test-2017_9_7_20_56_10 FROM localhost:5000/opus-deep-learning:master-test-2018_1_3_0_38_14 RUN mkdir /home/work COPY ./*.py /home/work/ ENV LC_ALL C.UTF-8 ENV LANG C.UTF-8 ================================================ FILE: simple/plotresults.py ================================================ # Code to plot learning curves # # # Copyright (c) 2018 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import glob import matplotlib.pyplot as plt fnames = glob.glob('./tmp/loss_simple_*.txt') #fnames = glob.glob('./tmp/loss_api_*.txt') #fnames = glob.glob('./tmp/loss_fixed_*.txt') plt.ion() plt.rc('font', size=12) plt.figure(figsize=(5,4)) # Smaller figure = relative larger fonts fulllosses=[] losses=[] lgts=[] for fn in fnames: z = np.loadtxt(fn) lgts.append(len(z)) fulllosses.append(z) minlen = min(lgts) for z in fulllosses: losses.append(z[:minlen]) losses = np.array(losses) meanl = np.mean(losses, axis=0) stdl = np.std(losses, axis=0) highl = np.max(losses, axis=0) lowl = np.min(losses, axis=0) #highl = meanl+stdl #lowl = meanl-stdl xx = range(len(meanl)) # xticks and labels xt = range(0, len(meanl), 50) xtl = [str(10*i) for i in xt] plt.fill_between(xx, lowl, highl, color='blue', alpha=.5) plt.plot(meanl, color='blue') #plt.xlabel('Loss (sum square diff. b/w final output and target)') plt.xlabel('Number of Episodes') plt.ylabel('Loss') plt.xticks(xt, xtl) plt.tight_layout() ================================================ FILE: simple/request.json ================================================ { "dockerImage":"tmiconi_rl", "tag":"master-test-2018_6_5_9_32_56", "name":"Exp_simple_1Miter_0addneur_plastic_lr3e-5", "cpus":2.0, "cmdLine":"export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \u0026\u0026 PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/nvidia/bin:/opt/hadoop/latest/bin \u0026\u0026 export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64/ \u0026\u0026 export LC_ALL=C.UTF-8 \u0026\u0026 export LANG=C.UTF-8 \u0026\u0026 cd /home/work/ \u0026\u0026 python3 full.py --patternsize 50 --nbaddneurons 0 --nbprescycles 1 --nbpatterns 2 --prestime 3 --interpresdelay 1 --nbiter 1000000 --lr 3e-5 --type plastic --rngseed {{mesos.instance}}", "ramMB":8000, "gpus":1, "diskMB":8000, "cluster":"opusprodda1", "environment":"devel", "user":"tmiconi", "resourcePool": "/ailabs/p2/tmiconi", "instances":10, "isService":false, "cronSchedule":"", "custom":{}, "application":"testversion", "maxRetries":1, "constraints":{"sku":"1080ti"}, "accessTypes":[], "dependencies":[], "cronCollisionPolicy":"CANCEL_NEW", "emailOnFail":[], "emailOnSucceed":[] } ================================================ FILE: simple/request_lstm.json ================================================ { "dockerImage":"tmiconi_rl", "tag":"master-test-2018_6_6_16_30_17", "name":"ExpD_simple_lstm_1Miter_1949addneur_clamp1_lr3e-5", "cpus":2.0, "cmdLine":"export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \u0026\u0026 PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/nvidia/bin:/opt/hadoop/latest/bin \u0026\u0026 export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64/ \u0026\u0026 export LC_ALL=C.UTF-8 \u0026\u0026 export LANG=C.UTF-8 \u0026\u0026 cd /home/work/ \u0026\u0026 python3 lstm.py --patternsize 50 --nbaddneurons 1949 --nbprescycles 1 --nbpatterns 2 --prestime 3 --interpresdelay 1 --nbiter 1000000 --clamp 1 --lr 3e-5 --rngseed {{mesos.instance}}", "ramMB":8000, "gpus":1, "diskMB":8000, "cluster":"opusprodda1", "environment":"devel", "user":"tmiconi", "resourcePool": "/ailabs/p2/tmiconi", "instances":10, "isService":false, "cronSchedule":"", "custom":{}, "application":"testversion", "maxRetries":1, "constraints":{"sku":"1080ti"}, "accessTypes":[], "dependencies":[], "cronCollisionPolicy":"CANCEL_NEW", "emailOnFail":[], "emailOnSucceed":[] } ================================================ FILE: simple/simple.py ================================================ # Differentiable plasticity: simple binary pattern memorization and reconstruction. # # Copyright (c) 2018 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # This program is meant as a simple instructional example for differentiable plasticity. It is fully functional but not very flexible. # Usage: python simple.py [rngseed], where rngseed is an optional parameter specifying the seed of the random number generator. # To use it on a GPU or CPU, toggle comments on the 'ttype' declaration below. import argparse import torch import torch.nn as nn from torch.autograd import Variable import numpy as np from numpy import random import torch.nn.functional as F from torch import optim import random import sys import pickle as pickle import pdb import time PATTERNSIZE = 1000 NBNEUR = PATTERNSIZE+1 # NbNeur = Pattern Size + 1 "bias", fixed-output neuron (bias neuron not needed for this task, but included for completeness) #ETA = .01 # The "learning rate" of plastic connections - we actually learn it ADAMLEARNINGRATE =3e-4 # The learning rate of the Adam optimizer RNGSEED = 0 # Initial random seed - can be modified by passing a number as command-line argument # Note that these patterns are likely not optimal PROBADEGRADE = .5 # Proportion of bits to zero out in the target pattern at test time NBPATTERNS = 5 # The number of patterns to learn in each episode NBPRESCYCLES = 2 # Number of times each pattern is to be presented PRESTIME = 6 # Number of time steps for each presentation PRESTIMETEST = 6 # Same thing but for the final test pattern INTERPRESDELAY = 4 # Duration of zero-input interval between presentations NBSTEPS = NBPRESCYCLES * ((PRESTIME + INTERPRESDELAY) * NBPATTERNS) + PRESTIMETEST # Total number of steps per episode if len(sys.argv) == 2: RNGSEED = int(sys.argv[1]) print("Setting RNGSEED to "+str(RNGSEED)) np.set_printoptions(precision=3) np.random.seed(RNGSEED); random.seed(RNGSEED); torch.manual_seed(RNGSEED) #ttype = torch.FloatTensor; # For CPU ttype = torch.cuda.FloatTensor; # For GPU # Generate the full list of inputs for an episode. The inputs are returned as a PyTorch tensor of shape NbSteps x 1 x NbNeur def generateInputsAndTarget(): inputT = np.zeros((NBSTEPS, 1, NBNEUR)) #inputTensor, initially in numpy format... # Create the random patterns to be memorized in an episode seedp = np.ones(PATTERNSIZE); seedp[:PATTERNSIZE//2] = -1 patterns=[] for nump in range(NBPATTERNS): p = np.random.permutation(seedp) patterns.append(p) # Now 'patterns' contains the NBPATTERNS patterns to be memorized in this episode - in numpy format # Choosing the test pattern, partially zero'ed out, that the network will have to complete testpattern = random.choice(patterns).copy() preservedbits = np.ones(PATTERNSIZE); preservedbits[:int(PROBADEGRADE * PATTERNSIZE)] = 0; np.random.shuffle(preservedbits) degradedtestpattern = testpattern * preservedbits # Inserting the inputs in the input tensor at the proper places for nc in range(NBPRESCYCLES): np.random.shuffle(patterns) for ii in range(NBPATTERNS): for nn in range(PRESTIME): numi =nc * (NBPATTERNS * (PRESTIME+INTERPRESDELAY)) + ii * (PRESTIME+INTERPRESDELAY) + nn inputT[numi][0][:PATTERNSIZE] = patterns[ii][:] # Inserting the degraded pattern for nn in range(PRESTIMETEST): inputT[-PRESTIMETEST + nn][0][:PATTERNSIZE] = degradedtestpattern[:] for nn in range(NBSTEPS): inputT[nn][0][-1] = 1.0 # Bias neuron. inputT[nn] *= 20.0 # Strengthen inputs inputT = torch.from_numpy(inputT).type(ttype) # Convert from numpy to Tensor target = torch.from_numpy(testpattern).type(ttype) return inputT, target class NETWORK(nn.Module): def __init__(self): super(NETWORK, self).__init__() # Notice that the vectors are row vectors, and the matrices are transposed wrt the usual order, following apparent pytorch conventions # Each *column* of w targets a single output neuron self.w = Variable(.01 * torch.randn(NBNEUR, NBNEUR).type(ttype), requires_grad=True) # The matrix of fixed (baseline) weights self.alpha = Variable(.01 * torch.randn(NBNEUR, NBNEUR).type(ttype), requires_grad=True) # The matrix of plasticity coefficients self.eta = Variable(.01 * torch.ones(1).type(ttype), requires_grad=True) # The weight decay term / "learning rate" of plasticity - trainable, but shared across all connections def forward(self, input, yin, hebb): # Run the network for one timestep yout = F.tanh( yin.mm(self.w + torch.mul(self.alpha, hebb)) + input ) hebb = (1 - self.eta) * hebb + self.eta * torch.bmm(yin.unsqueeze(2), yout.unsqueeze(1))[0] # bmm here is used to implement an outer product between yin and yout, with the help of unsqueeze (i.e. added empty dimensions) return yout, hebb def initialZeroState(self): # Return an initialized, all-zero hidden state return Variable(torch.zeros(1, NBNEUR).type(ttype)) def initialZeroHebb(self): # Return an initialized, all-zero Hebbian trace return Variable(torch.zeros(NBNEUR, NBNEUR).type(ttype)) net = NETWORK() optimizer = torch.optim.Adam([net.w, net.alpha, net.eta], lr=ADAMLEARNINGRATE) total_loss = 0.0; all_losses = [] print_every = 10 nowtime = time.time() for numiter in range(2000): # Initialize network for each episode y = net.initialZeroState() hebb = net.initialZeroHebb() optimizer.zero_grad() # Generate the inputs and target pattern for this episode inputs, target = generateInputsAndTarget() # Run the episode! for numstep in range(NBSTEPS): y, hebb = net(Variable(inputs[numstep], requires_grad=False), y, hebb) # Compute loss for this episode (last step only) loss = (y[0][:PATTERNSIZE] - Variable(target, requires_grad=False)).pow(2).sum() # Apply backpropagation to adapt basic weights and plasticity coefficients loss.backward() optimizer.step() # That's it for the actual algorithm! # Print statistics, save files #lossnum = loss.data[0] # Saved loss is the actual learning loss (MSE) to = target.cpu().numpy(); yo = y.data.cpu().numpy()[0][:PATTERNSIZE]; z = (np.sign(yo) != np.sign(to)); lossnum = np.mean(z) # Saved loss is the error rate total_loss += lossnum if (numiter+1) % print_every == 0: print((numiter, "====")) print(target.cpu().numpy()[-10:]) # Target pattern to be reconstructed print(inputs.cpu().numpy()[numstep][0][-10:]) # Last input contains the degraded pattern fed to the network at test time print(y.data.cpu().numpy()[0][-10:]) # Final output of the network previoustime = nowtime nowtime = time.time() print("Time spent on last", print_every, "iters: ", nowtime - previoustime) total_loss /= print_every all_losses.append(total_loss) print("Mean loss over last", print_every, "iters:", total_loss) print("") with open('output_simple_'+str(RNGSEED)+'.dat', 'wb') as fo: pickle.dump(net.w.data.cpu().numpy(), fo) pickle.dump(net.alpha.data.cpu().numpy(), fo) pickle.dump(y.data.cpu().numpy(), fo) # The final y for this episode pickle.dump(all_losses, fo) with open('loss_simple_'+str(RNGSEED)+'.txt', 'w') as fo: for item in all_losses: fo.write("%s\n" % item) total_loss = 0 ================================================ FILE: simple/simplest.py ================================================ # Differentiable plasticity: simplest fully functional code. # Copyright (c) 2018 Uber Technologies, Inc. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # This is a very simple, but fully functional implementation of Differentiable # Plasticity. It implements the binary pattern completion task discussed in # Section 4.1 of Miconi et al. ICML 2018 (https://arxiv.org/abs/1804.02464). # The code implements a simple RNN with plastic weights. It requires PyTorch, # but does not use a GPU. # The actual code that specifically implements plasticity # amounts to less than 4 lines of code in total (see Section # S1 in the paper cited above). import argparse import torch from torch.autograd import Variable import numpy as np import torch.nn.functional as F import random import time PATTERNSIZE = 1000 # Size of the patterns to memorize NBNEUR = PATTERNSIZE # One neuron per pattern element NBPATTERNS = 5 # The number of patterns to learn in each episode NBPRESCYCLES = 2 # Number of times each pattern is to be presented PRESTIME = 6 # Number of time steps for each presentation PRESTIMETEST = 6 # Same thing but for the final test pattern INTERPRESDELAY = 4 # Duration of zero-input interval between presentations NBSTEPS = NBPRESCYCLES * ((PRESTIME + INTERPRESDELAY) * NBPATTERNS) + PRESTIMETEST # Total number of steps per episode # Generate the full list of inputs, as well as the target output at last time step, for an episode. def generateInputsAndTarget(): inputT = np.zeros((NBSTEPS, 1, NBNEUR)) #inputTensor, initially in numpy format # Create the random patterns to be memorized in an episode patterns=[] for nump in range(NBPATTERNS): patterns.append(2*np.random.randint(2, size=PATTERNSIZE)-1) # Building the test pattern, partially zero'ed out, that the network will have to complete testpattern = random.choice(patterns).copy() degradedtestpattern = testpattern * np.random.randint(2, size=PATTERNSIZE) # Inserting the inputs in the input tensor at the proper places for nc in range(NBPRESCYCLES): np.random.shuffle(patterns) for ii in range(NBPATTERNS): for nn in range(PRESTIME): numi =nc * (NBPATTERNS * (PRESTIME+INTERPRESDELAY)) + ii * (PRESTIME+INTERPRESDELAY) + nn inputT[numi][0][:] = patterns[ii][:] # Inserting the degraded pattern for nn in range(PRESTIMETEST): inputT[-PRESTIMETEST + nn][0][:] = degradedtestpattern[:] inputT = 20.0 * torch.from_numpy(inputT.astype(np.float32)) # Convert from numpy to Tensor target = torch.from_numpy(testpattern.astype(np.float32)) return inputT, target total_loss = 0.0; all_losses = [] nowtime = time.time() # === Actual algorithm === # Note that each column of w and alpha defines the inputs to a single neuron w = Variable(.01 * torch.randn(NBNEUR, NBNEUR), requires_grad=True) # Fixed weights alpha = Variable(.01 * torch.randn(NBNEUR, NBNEUR), requires_grad=True) # Plasticity coeffs. optimizer = torch.optim.Adam([w, alpha], lr=3e-4) print("Starting episodes...") for numiter in range(1000): # Loop over episodes y = Variable(torch.zeros(1, NBNEUR)) # Initialize neuron activations hebb = Variable(torch.zeros(NBNEUR, NBNEUR)) # Initialize Hebbian trace inputs, target = generateInputsAndTarget() # Generate inputs & target for this episode optimizer.zero_grad() # Run the episode: for numstep in range(NBSTEPS): yout = F.tanh( y.mm(w + torch.mul(alpha, hebb)) + Variable(inputs[numstep], requires_grad=False) ) hebb = .99 * hebb + .01 * torch.ger(y[0], yout[0]) # torch.ger = Outer product y = yout # Episode done, now compute loss, apply backpropagation loss = (y[0] - Variable(target, requires_grad=False)).pow(2).sum() loss.backward() optimizer.step() # === End of actual algorithm === # Print statistics print_every = 10 to = target.cpu().numpy(); yo = y.data.cpu().numpy()[0][:] z = (np.sign(yo) != np.sign(to)); lossnum = np.mean(z) # Compute error rate total_loss += lossnum if (numiter+1) % print_every == 0: previoustime = nowtime; nowtime = time.time() print("Episode", numiter, "=== Time spent on last", print_every, "iters: ", nowtime - previoustime) print(target.cpu().numpy()[-10:]) # Target pattern to be reconstructed print(inputs.cpu().numpy()[numstep][0][-10:]) # Last input (degraded pattern) print(y.data.cpu().numpy()[0][-10:]) # Final output of the network total_loss /= print_every print("Mean error rate over last", print_every, "iters:", total_loss, "\n") total_loss = 0 ================================================ FILE: simplemaze/README.md ================================================ # Simple code for the grid maze task. This code is a deliberately simplified version of the `maze` experiment. The code is made as simple as possible, with copious comments. ##Usage To run the program, just run `python3 maze.py`. Default parameters should be able to meta-learn the task. ## Backpropamine network The `Network` class in `maze/maze.py` implements a Backpropamine recurrent network, that is, a fully-connected recurrent neural network with neuromodulated Hebbian plastic connections that is trained by gradient descent. Here is the full code for the `Network` class, which contains the entire machinery for Backpropamine (note that it only contains ~25 lines of code). ```python class Network(nn.Module): def __init__(self, isize, hsize): super(Network, self).__init__() self.hsize, self.isize = hsize, isize self.i2h = torch.nn.Linear(isize, hsize) # Weights from input to recurrent layer self.w = torch.nn.Parameter(.001 * torch.rand(hsize, hsize)) # Baseline ("fixed") component of the plastic recurrent layer self.alpha = torch.nn.Parameter(.001 * torch.rand(hsize, hsize)) # Plasticity coefficients of the plastic recurrent layer; one alpha coefficient per recurrent connection self.h2mod = torch.nn.Linear(hsize, 1) # Weights from the recurrent layer to the (single) neurodulator output self.modfanout = torch.nn.Linear(1, hsize) # The modulator output is passed through a different 'weight' for each neuron (it 'fans out' over neurons) self.h2o = torch.nn.Linear(hsize, NBACTIONS) # From recurrent to outputs (action probabilities) self.h2v = torch.nn.Linear(hsize, 1) # From recurrent to value-prediction (used for A2C) def forward(self, inputs, hidden): # hidden is a tuple containing h-state and the hebbian trace HS = self.hsize # hidden[0] is the h-state; hidden[1] is the Hebbian trace hebb = hidden[1] # Each *column* of w, alpha and hebb contains the inputs weights to a single neuron hactiv = torch.tanh( self.i2h(inputs) + hidden[0].unsqueeze(1).bmm(self.w + torch.mul(self.alpha, hebb)).squeeze(1) ) activout = self.h2o(hactiv) # Pure linear, raw scores - to be softmaxed later, outside the function valueout = self.h2v(hactiv) # Now computing the Hebbian updates... deltahebb = torch.bmm(hidden[0].unsqueeze(2), hactiv.unsqueeze(1)) # Batched outer product of previous hidden state with new hidden state # We also need to compute the eta (the plasticity rate), wich is determined by neuromodulation myeta = F.tanh(self.h2mod(hactiv)).unsqueeze(2) # Shape: BatchSize x 1 x 1 # The neuromodulated eta is passed through a vector of fanout weights, one per neuron. # Each *column* in w, hebb and alpha constitutes the inputs to a single cell # For w and alpha, columns are 2nd dimension (i.e. dim 1); for hebb, it's dimension 2 (dimension 0 is batch) # The output of the following line has shape BatchSize x 1 x NHidden, i.e. 1 line and NHidden columns for each # batch element. When multiplying by hebb (BatchSize x NHidden x NHidden), broadcasting will provide a different # value for each cell but the same value for all inputs of a cell, as required by fanout concept. myeta = self.modfanout(myeta) # Updating Hebbian traces, with a hard clip (other choices are possible) self.clipval = 2.0 hebb = torch.clamp(hebb + myeta * deltahebb, min=-self.clipval, max=self.clipval) hidden = (hactiv, hebb) return activout, valueout, hidden def initialZeroHebb(self, BATCHSIZE): return Variable(torch.zeros(BATCHSIZE, self.hsize, self.hsize) , requires_grad=False) def initialZeroState(self, BATCHSIZE): return Variable(torch.zeros(BATCHSIZE, self.hsize), requires_grad=False ) ``` The rest of the code implements a simple A2C algorithm to train the network for the Grid Maze task. ## Copyright and licensing information Copyright (c) 2018-2019 Uber Technologies, Inc. Licensed under the Uber Non-Commercial License (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at the root directory of this project. See the License file in this repository for the specific language governing permissions and limitations under the License. ================================================ FILE: simplemaze/maze.py ================================================ # Backpropamine: differentiable neuromdulated plasticity. # # Copyright (c) 2018-2019 Uber Technologies, Inc. # # Licensed under the Uber Non-Commercial License (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at the root directory of this project. # # See the License file in this repository for the specific language governing # permissions and limitations under the License. # This code implements the "Grid Maze" task. See section 4.2 in # Miconi et al. ICLR 2019 ( https://openreview.net/pdf?id=r1lrAiA5Ym ) # or section 4.5 in Miconi et al. # ICML 2018 ( https://arxiv.org/abs/1804.02464 ) # The Network class implements a "backpropamine" network, that is, a neural # network with neuromodulated Hebbian plastic connections that is trained by # gradient descent. The Backpropamine machinery is # entirely contained in the Network class (~25 lines of code). # The rest of the code implements a simple # A2C algorithm to train the network for the Grid Maze task. import argparse import pdb #from line_profiler import LineProfiler import torch import torch.nn as nn from torch.autograd import Variable import numpy as np from numpy import random import torch.nn.functional as F from torch import optim from torch.optim import lr_scheduler import random import sys import pickle import time import os import platform import numpy as np np.set_printoptions(precision=4) ADDITIONALINPUTS = 4 # 1 input for the previous reward, 1 input for numstep, 1 unused, 1 "Bias" input NBACTIONS = 4 # U, D, L, R RFSIZE = 3 # Receptive Field: RFSIZE x RFSIZE TOTALNBINPUTS = RFSIZE * RFSIZE + ADDITIONALINPUTS + NBACTIONS # RNN with trainable modulated plasticity ("backpropamine") class Network(nn.Module): def __init__(self, isize, hsize): super(Network, self).__init__() self.hsize, self.isize = hsize, isize self.i2h = torch.nn.Linear(isize, hsize) # Weights from input to recurrent layer self.w = torch.nn.Parameter(.001 * torch.rand(hsize, hsize)) # Baseline (non-plastic) component of the plastic recurrent layer self.alpha = torch.nn.Parameter(.001 * torch.rand(hsize, hsize)) # Plasticity coefficients of the plastic recurrent layer; one alpha coefficient per recurrent connection #self.alpha = torch.nn.Parameter(.0001 * torch.rand(1,1,hsize)) # Per-neuron alpha #self.alpha = torch.nn.Parameter(.0001 * torch.ones(1)) # Single alpha for whole network self.h2mod = torch.nn.Linear(hsize, 1) # Weights from the recurrent layer to the (single) neurodulator output self.modfanout = torch.nn.Linear(1, hsize) # The modulator output is passed through a different 'weight' for each neuron (it 'fans out' over neurons) self.h2o = torch.nn.Linear(hsize, NBACTIONS) # From recurrent to outputs (action probabilities) self.h2v = torch.nn.Linear(hsize, 1) # From recurrent to value-prediction (used for A2C) def forward(self, inputs, hidden): # hidden is a tuple containing the h-state (i.e. the recurrent hidden state) and the hebbian trace HS = self.hsize # hidden[0] is the h-state; hidden[1] is the Hebbian trace hebb = hidden[1] # Each *column* of w, alpha and hebb contains the inputs weights to a single neuron hactiv = torch.tanh( self.i2h(inputs) + hidden[0].unsqueeze(1).bmm(self.w + torch.mul(self.alpha, hebb)).squeeze(1) ) # Update the h-state activout = self.h2o(hactiv) # Pure linear, raw scores - to be softmaxed later, outside the function valueout = self.h2v(hactiv) # Now computing the Hebbian updates... deltahebb = torch.bmm(hidden[0].unsqueeze(2), hactiv.unsqueeze(1)) # Batched outer product of previous hidden state with new hidden state # We also need to compute the eta (the plasticity rate), wich is determined by neuromodulation # Note that this is "simple" neuromodulation. myeta = F.tanh(self.h2mod(hactiv)).unsqueeze(2) # Shape: BatchSize x 1 x 1 # The neuromodulated eta is passed through a vector of fanout weights, one per neuron. # Each *column* in w, hebb and alpha constitutes the inputs to a single cell. # For w and alpha, columns are 2nd dimension (i.e. dim 1); for hebb, it's dimension 2 (dimension 0 is batch) # The output of the following line has shape BatchSize x 1 x NHidden, i.e. 1 line and NHidden columns for each # batch element. When multiplying by hebb (BatchSize x NHidden x NHidden), broadcasting will provide a different # value for each cell but the same value for all inputs of a cell, as required by fanout concept. myeta = self.modfanout(myeta) # Updating Hebbian traces, with a hard clip (other choices are possible) self.clipval = 2.0 hebb = torch.clamp(hebb + myeta * deltahebb, min=-self.clipval, max=self.clipval) hidden = (hactiv, hebb) return activout, valueout, hidden def initialZeroState(self, BATCHSIZE): return Variable(torch.zeros(BATCHSIZE, self.hsize), requires_grad=False ) # In plastic networks, we must also initialize the Hebbian state: def initialZeroHebb(self, BATCHSIZE): return Variable(torch.zeros(BATCHSIZE, self.hsize, self.hsize) , requires_grad=False) # That's it for plasticity! The rest of the code simply implements the maze task and the A2C RL algorithm. def train(paramdict): #params = dict(click.get_current_context().params) #TOTALNBINPUTS = RFSIZE * RFSIZE + ADDITIONALINPUTS + NBNONRESTACTIONS print("Starting training...") params = {} #params.update(defaultParams) params.update(paramdict) print("Passed params: ", params) print(platform.uname()) #params['nbsteps'] = params['nbshots'] * ((params['prestime'] + params['interpresdelay']) * params['nbclasses']) + params['prestimetest'] # Total number of steps per episode suffix = "btchFixmod_"+"".join([str(x)+"_" if pair[0] is not 'nbsteps' and pair[0] is not 'rngseed' and pair[0] is not 'save_every' and pair[0] is not 'test_every' and pair[0] is not 'pe' else '' for pair in sorted(zip(params.keys(), params.values()), key=lambda x:x[0] ) for x in pair])[:-1] + "_rngseed_" + str(params['rngseed']) # Turning the parameters into a nice suffix for filenames # Initialize random seeds (first two redundant?) print("Setting random seeds") np.random.seed(params['rngseed']); random.seed(params['rngseed']); torch.manual_seed(params['rngseed']) #print(click.get_current_context().params) print("Initializing network") use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") net = Network(TOTALNBINPUTS, params['hs']).to(device) # Creating the network print ("Shape of all optimized parameters:", [x.size() for x in net.parameters()]) allsizes = [torch.numel(x.data.cpu()) for x in net.parameters()] print ("Size (numel) of all optimized elements:", allsizes) print ("Total size (numel) of all optimized elements:", sum(allsizes)) #total_loss = 0.0 print("Initializing optimizer") optimizer = torch.optim.Adam(net.parameters(), lr=1.0*params['lr'], eps=1e-4, weight_decay=params['l2']) #optimizer = torch.optim.SGD(net.parameters(), lr=1.0*params['lr']) #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, gamma=params['gamma'], step_size=params['steplr']) BATCHSIZE = params['bs'] LABSIZE = params['msize'] lab = np.ones((LABSIZE, LABSIZE)) CTR = LABSIZE // 2 # Grid maze lab[1:LABSIZE-1, 1:LABSIZE-1].fill(0) for row in range(1, LABSIZE - 1): for col in range(1, LABSIZE - 1): if row % 2 == 0 and col % 2 == 0: lab[row, col] = 1 # Not strictly necessary, but cleaner since we start the agent at the # center for each episode; may help loclization in some maze sizes # (including 13 and 9, but not 11) by introducing a detectable irregularity # in the center: lab[CTR,CTR] = 0 all_losses = [] all_grad_norms = [] all_losses_objective = [] all_total_rewards = [] all_losses_v = [] lossbetweensaves = 0 nowtime = time.time() meanrewards = np.zeros((LABSIZE, LABSIZE)) meanrewardstmp = np.zeros((LABSIZE, LABSIZE, params['eplen'])) pos = 0 hidden = net.initialZeroState(BATCHSIZE) hebb = net.initialZeroHebb(BATCHSIZE) #pw = net.initialZeroPlasticWeights() # For eligibility traces #celoss = torch.nn.CrossEntropyLoss() # For supervised learning - not used here print("Starting episodes!") for numiter in range(params['nbiter']): PRINTTRACE = 0 #if (numiter+1) % (1 + params['pe']) == 0: if (numiter+1) % (params['pe']) == 0: PRINTTRACE = 1 #lab = makemaze.genmaze(size=LABSIZE, nblines=4) #count = np.zeros((LABSIZE, LABSIZE)) # Select the reward location for this episode - not on a wall! # And not on the center either! (though not sure how useful that restriction is...) # We always start the episode from the center posr = {}; posc = {} rposr = {}; rposc = {} for nb in range(BATCHSIZE): # Note: it doesn't matter if the reward is on the center (see below). All we need is not to put it on a wall or pillar (lab=1) myrposr = 0; myrposc = 0 while lab[myrposr, myrposc] == 1 or (myrposr == CTR and myrposc == CTR): myrposr = np.random.randint(1, LABSIZE - 1) myrposc = np.random.randint(1, LABSIZE - 1) rposr[nb] = myrposr; rposc[nb] = myrposc #print("Reward pos:", rposr, rposc) # Agent always starts an episode from the center posc[nb] = CTR posr[nb] = CTR optimizer.zero_grad() loss = 0 lossv = 0 hidden = net.initialZeroState(BATCHSIZE).to(device) hebb = net.initialZeroHebb(BATCHSIZE).to(device) numactionchosen = 0 reward = np.zeros(BATCHSIZE) sumreward = np.zeros(BATCHSIZE) rewards = [] vs = [] logprobs = [] dist = 0 numactionschosen = np.zeros(BATCHSIZE, dtype='int32') #reloctime = np.random.randint(params['eplen'] // 4, (3 * params['eplen']) // 4) #print("EPISODE ", numiter) for numstep in range(params['eplen']): inputs = np.zeros((BATCHSIZE, TOTALNBINPUTS), dtype='float32') labg = lab.copy() for nb in range(BATCHSIZE): inputs[nb, 0:RFSIZE * RFSIZE] = labg[posr[nb] - RFSIZE//2:posr[nb] + RFSIZE//2 +1, posc[nb] - RFSIZE //2:posc[nb] + RFSIZE//2 +1].flatten() * 1.0 # Previous chosen action inputs[nb, RFSIZE * RFSIZE +1] = 1.0 # Bias neuron inputs[nb, RFSIZE * RFSIZE +2] = numstep / params['eplen'] inputs[nb, RFSIZE * RFSIZE +3] = 1.0 * reward[nb] inputs[nb, RFSIZE * RFSIZE + ADDITIONALINPUTS + numactionschosen[nb]] = 1 inputsC = torch.from_numpy(inputs).to(device) ## Running the network y, v, (hidden, hebb) = net(inputsC, (hidden, hebb)) # y should output raw scores, not probas y = torch.softmax(y, dim=1) distrib = torch.distributions.Categorical(y) actionschosen = distrib.sample() logprobs.append(distrib.log_prob(actionschosen)) numactionschosen = actionschosen.data.cpu().numpy() # We want to break gradients reward = np.zeros(BATCHSIZE, dtype='float32') for nb in range(BATCHSIZE): myreward = 0 numactionchosen = numactionschosen[nb] tgtposc = posc[nb] tgtposr = posr[nb] if numactionchosen == 0: # Up tgtposr -= 1 elif numactionchosen == 1: # Down tgtposr += 1 elif numactionchosen == 2: # Left tgtposc -= 1 elif numactionchosen == 3: # Right tgtposc += 1 else: raise ValueError("Wrong Action") reward[nb] = 0.0 # The reward for this step if lab[tgtposr][tgtposc] == 1: reward[nb] -= params['wp'] else: posc[nb] = tgtposc posr[nb] = tgtposr # Did we hit the reward location ? Increase reward and teleport! # Note that it doesn't matter if we teleport onto the reward, since reward hitting is only evaluated after the (obligatory) move... # But we still avoid it. if rposr[nb] == posr[nb] and rposc[nb] == posc[nb]: reward[nb] += params['rew'] posr[nb]= np.random.randint(1, LABSIZE - 1) posc[nb] = np.random.randint(1, LABSIZE - 1) while lab[posr[nb], posc[nb]] == 1 or (rposr[nb] == posr[nb] and rposc[nb] == posc[nb]): posr[nb] = np.random.randint(1, LABSIZE - 1) posc[nb] = np.random.randint(1, LABSIZE - 1) rewards.append(reward) vs.append(v) sumreward += reward # This is an "entropy penalty", implemented by the sum-of-squares of the probabilities because our version of PyTorch did not have an entropy() function. # The result is the same: to penalize concentration, i.e. encourage diversity in chosen actions. loss += ( params['bent'] * y.pow(2).sum() / BATCHSIZE ) #if PRINTTRACE: # print("Step ", numstep, " Inputs (to 1st in batch): ", inputs[0, :TOTALNBINPUTS], " - Outputs(1st in batch): ", y[0].data.cpu().numpy(), " - action chosen(1st in batch): ", numactionschosen[0], # #" - mean abs pw: ", np.mean(np.abs(pw.data.cpu().numpy())), # " -Reward (this step, 1st in batch): ", reward[0]) # Episode is done, now let's do the actual computations of rewards and losses for the A2C algorithm R = torch.zeros(BATCHSIZE).to(device) gammaR = params['gr'] for numstepb in reversed(range(params['eplen'])) : R = gammaR * R + torch.from_numpy(rewards[numstepb]).to(device) ctrR = R - vs[numstepb][0] lossv += ctrR.pow(2).sum() / BATCHSIZE loss -= (logprobs[numstepb] * ctrR.detach()).sum() / BATCHSIZE #pdb.set_trace() loss += params['blossv'] * lossv loss /= params['eplen'] if PRINTTRACE: if True: print("lossv: ", float(lossv)) print ("Total reward for this episode (all):", sumreward, "Dist:", dist) loss.backward() all_grad_norms.append(torch.nn.utils.clip_grad_norm(net.parameters(), params['gc'])) if numiter > 100: # Burn-in period for meanrewards optimizer.step() lossnum = float(loss) lossbetweensaves += lossnum all_losses_objective.append(lossnum) all_total_rewards.append(sumreward.mean()) if (numiter+1) % params['pe'] == 0: print(numiter, "====") print("Mean loss: ", lossbetweensaves / params['pe']) lossbetweensaves = 0 print("Mean reward (across batch and last", params['pe'], "eps.): ", np.sum(all_total_rewards[-params['pe']:])/ params['pe']) #print("Mean reward (across batch): ", sumreward.mean()) previoustime = nowtime nowtime = time.time() print("Time spent on last", params['pe'], "iters: ", nowtime - previoustime) #print("ETA: ", net.eta.data.cpu().numpy(), " etaet: ", net.etaet.data.cpu().numpy()) if (numiter+1) % params['save_every'] == 0: print("Saving files...") losslast100 = np.mean(all_losses_objective[-100:]) print("Average loss over the last 100 episodes:", losslast100) print("Saving local files...") with open('grad_'+suffix+'.txt', 'w') as thefile: for item in all_grad_norms[::10]: thefile.write("%s\n" % item) with open('loss_'+suffix+'.txt', 'w') as thefile: for item in all_total_rewards[::10]: thefile.write("%s\n" % item) torch.save(net.state_dict(), 'torchmodel_'+suffix+'.dat') with open('params_'+suffix+'.dat', 'wb') as fo: pickle.dump(params, fo) if os.path.isdir('/mnt/share/tmiconi'): print("Transferring to NFS storage...") for fn in ['params_'+suffix+'.dat', 'loss_'+suffix+'.txt', 'torchmodel_'+suffix+'.dat']: result = os.system( 'cp {} {}'.format(fn, '/mnt/share/tmiconi/modulmaze/'+fn)) print("Done!") if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--rngseed", type=int, help="random seed", default=0) parser.add_argument("--rew", type=float, help="reward value (reward increment for taking correct action after correct stimulus)", default=10.0) parser.add_argument("--wp", type=float, help="penalty for hitting walls", default=.0) parser.add_argument("--bent", type=float, help="coefficient for the entropy reward (really Simpson index concentration measure)", default=0.03) parser.add_argument("--blossv", type=float, help="coefficient for value prediction loss", default=.1) parser.add_argument("--msize", type=int, help="size of the maze; must be odd", default=11) parser.add_argument("--gr", type=float, help="gammaR: discounting factor for rewards", default=.9) parser.add_argument("--gc", type=float, help="gradient norm clipping", default=4.0) parser.add_argument("--lr", type=float, help="learning rate (Adam optimizer)", default=1e-4) parser.add_argument("--eplen", type=int, help="length of episodes", default=200) parser.add_argument("--hs", type=int, help="size of the recurrent (hidden) layer", default=100) parser.add_argument("--bs", type=int, help="batch size", default=30) parser.add_argument("--l2", type=float, help="coefficient of L2 norm (weight decay)", default=0) # 3e-6 parser.add_argument("--nbiter", type=int, help="number of learning cycles", default=1000000) parser.add_argument("--save_every", type=int, help="number of cycles between successive save points", default=50) parser.add_argument("--pe", type=int, help="number of cycles between successive printing of information", default=10) args = parser.parse_args(); argvars = vars(args); argdict = { k : argvars[k] for k in argvars if argvars[k] != None } train(argdict) ================================================ FILE: sr/.gitignore ================================================ tmp/ tmp/* *.txt *.dat *.swp ================================================ FILE: sr/OpusHdfsCopy.py ================================================ import os import os.path def checkHdfs(): return os.path.isfile('/opt/hadoop/latest/bin/hdfs') def transferFileToHdfsPath(sourcepath, targetpath): hdfspath = targetpath targetdir = os.path.dirname(targetpath) os.system('/opt/hadoop/latest/bin/hdfs dfs -mkdir -p {}'.format(targetdir)) result = os.system( '/opt/hadoop/latest/bin/hdfs dfs -copyFromLocal -f {} {}'.format(sourcepath, hdfspath) ) if result != 0: raise OSError('Cannot copyFromLocal {} {} returned {}'.format(sourcepath, hdfspath, result)) def transferFileToHdfsDir(sourcepath, targetdir): hdfspath = os.path.join(targetdir, os.path.basename(sourcepath)) os.system('/opt/hadoop/latest/bin/hdfs dfs -mkdir -p {}'.format(targetdir)) result = os.system( '/opt/hadoop/latest/bin/hdfs dfs -copyFromLocal -f {} {}'.format(sourcepath, hdfspath) ) if result != 0: raise OSError('Cannot copyFromLocal {} {} returned {}'.format(sourcepath, hdfspath, result)) ================================================ FILE: sr/README.md ================================================ # Target discovery task A simple stimulus-response ("SR") association task. At the start of each episode, we generate four random "cues" (i.e. random binary vectors of length 20). One of them is randomly chosen as the "target". Then, we repeatedly show pairs of cues (randomly chosen among the four) in succession, and ask the network to specify whether one of these two is the target. If the network's answer is correct, a reward is issued, otherwise nothing happens. The network's task is to obtain as much reward as possible during each episode. Note that the network must identify the target (from reward information alone), then detect it and respond adequately afterwards. Furthermore, because cues are shown in pairs, the target can never be fully identified in a single "trial": the network is forced to integrate information across successive "trials". The outer-loop metal-learning algorithm is Advantage Actor critic. All within-episode learning occurs through the self-modulated plasticity of network connections. Usage: `python3 srbatch.py --eplen 120 --hs 200 --lr 1e-4 --l2 0 --pe 500 --bv 0.1 --bent 0.1 --rew 1 --wp 0 --save_every 2000 --type modul --da tanh --clamp 0 --nbiter 200000 --fm 1 --ni 4 --pf .0 --alg A3C --cs 20 --eps 1e-6 --is 0 --bs 30 --gc 2.0 --rngseed 0` `eplen` is the length of an episode, `hs` is the hidden/recurrent layer size, `bs` is batch size and `gc` is gradient clipping. `type` can be "modplast" (simple neuromodulation), "modul" (retroactive modulation), "plastic" (non-modulated plasticity) or "rnn" (no plasticity at all, plain rnn). Note that `srbatch.py` implements batch training: the first dimension in the data, the hidden state and the Hebbian traces is a batch dimension. ================================================ FILE: sr/anim.py ================================================ import argparse import pdb import torch import torch.nn as nn from torch.autograd import Variable import numpy as np from numpy import random import torch.nn.functional as F from torch import optim from torch.optim import lr_scheduler import random import sys import pickle import time import os import OpusHdfsCopy from OpusHdfsCopy import transferFileToHdfsDir, checkHdfs import platform import modul from modul import Network import numpy as np import matplotlib.pyplot as plt import matplotlib.animation as animation import glob np.set_printoptions(precision=4) ETA = .02 # Not used ADDINPUT = 4 # 1 input for the previous reward, 1 input for numstep, 1 for whether currently on reward square, 1 "Bias" input NBACTIONS = 4 # U, D, L, R RFSIZE = 3 # Receptive Field TOTALNBINPUTS = RFSIZE * RFSIZE + ADDINPUT + NBACTIONS fig = plt.figure() plt.axis('off') def train(paramdict): fname = paramdict['file'] with open(fname, 'rb') as f: params = pickle.load(f) #params = dict(click.get_current_context().params) print("Passed params: ", params) print(platform.uname()) #params['nbsteps'] = params['nbshots'] * ((params['prestime'] + params['interpresdelay']) * params['nbclasses']) + params['prestimetest'] # Total number of steps per episode suffix = "modulmaze_"+"".join([str(x)+"_" if pair[0] != 'nbsteps' and pair[0] != 'rngseed' and pair[0] != 'save_every' and pair[0] != 'test_every' else '' for pair in sorted(zip(params.keys(), params.values()), key=lambda x:x[0] ) for x in pair])[:-1] + "_rngseed_" + str(params['rngseed']) # Turning the parameters into a nice suffix for filenames #params['rngseed'] = 3 # Initialize random seeds (first two redundant?) print("Setting random seeds") np.random.seed(params['rngseed']); random.seed(params['rngseed']); torch.manual_seed(params['rngseed']) #print(click.get_current_context().params) net = Network(params) # YOU MAY NEED TO CHANGE THE DIRECTORY HERE: net.load_state_dict(torch.load('./tmp/torchmodel_'+suffix + '.dat')) print ("Shape of all optimized parameters:", [x.size() for x in net.parameters()]) allsizes = [torch.numel(x.data.cpu()) for x in net.parameters()] print ("Size (numel) of all optimized elements:", allsizes) print ("Total size (numel) of all optimized elements:", sum(allsizes)) LABSIZE = params['msize'] lab = np.ones((LABSIZE, LABSIZE)) CTR = LABSIZE // 2 # Grid maze lab[1:LABSIZE-1, 1:LABSIZE-1].fill(0) for row in range(1, LABSIZE - 1): for col in range(1, LABSIZE - 1): if row % 2 == 0 and col % 2 == 0: lab[row, col] = 1 lab[CTR,CTR] = 0 # Not strictly necessary, but perhaps helps loclization by introducing a detectable irregularity in the center all_losses = [] all_losses_objective = [] all_total_rewards = [] all_losses_v = [] lossbetweensaves = 0 nowtime = time.time() meanrewards = np.zeros((LABSIZE, LABSIZE)) meanrewardstmp = np.zeros((LABSIZE, LABSIZE, params['eplen'])) pos = 0 params['nbiter'] = 3 ax_imgs = [] for numiter in range(params['nbiter']): PRINTTRACE = 0 #if (numiter+1) % (1 + params['print_every']) == 0: if (numiter+1) % (params['print_every']) == 0: PRINTTRACE = 1 #lab = makemaze.genmaze(size=LABSIZE, nblines=4) #count = np.zeros((LABSIZE, LABSIZE)) # Select the reward location for this episode - not on a wall! rposr = 0; rposc = 0 while lab[rposr, rposc] == 1: rposr = np.random.randint(1, LABSIZE - 1) rposc = np.random.randint(1, LABSIZE - 1) # We always start the episode from the center (when hitting reward, we may teleport either to center or to a random location depending on params['rsp']) posc = CTR posr = CTR #optimizer.zero_grad() loss = 0 lossv = 0 hidden = net.initialZeroState() hebb = net.initialZeroHebb() et = net.initialZeroHebb() pw = net.initialZeroPlasticWeights() numactionchosen = 0 reward = 0.0 rewards = [] vs = [] logprobs = [] sumreward = 0.0 dist = 0 #print("EPISODE ", numiter) for numstep in range(params['eplen']): if params['clamp'] == 0: inputs = np.zeros((1, TOTALNBINPUTS), dtype='float32') else: inputs = np.zeros((1, params['hs']), dtype='float32') labg = lab.copy() #labg[rposr, rposc] = -1 # The agent can see the reward if it falls within its RF inputs[0, 0:RFSIZE * RFSIZE] = labg[posr - RFSIZE//2:posr + RFSIZE//2 +1, posc - RFSIZE //2:posc + RFSIZE//2 +1].flatten() * 1.0 # Previous chosen action inputs[0, RFSIZE * RFSIZE +1] = 1.0 # Bias neuron inputs[0, RFSIZE * RFSIZE +2] = numstep / params['eplen'] inputs[0, RFSIZE * RFSIZE +3] = 1.0 * reward # Reward from previous time step inputs[0, RFSIZE * RFSIZE + ADDINPUT + numactionchosen] = 1 inputsC = torch.from_numpy(inputs).cuda() ## Running the network y, v, hidden, hebb, et, pw = net(Variable(inputsC, requires_grad=False), hidden, hebb, et, pw) # y should output raw scores, not probas y = F.softmax(y, dim=1) # Must convert y to probas to use this ! distrib = torch.distributions.Categorical(y) actionchosen = distrib.sample() # sample() returns a Pytorch tensor of size 1; this is needed for the backprop below numactionchosen = actionchosen.data[0] # Turn to scalar tgtposc = posc tgtposr = posr if numactionchosen == 0: # Up tgtposr -= 1 elif numactionchosen == 1: # Down tgtposr += 1 elif numactionchosen == 2: # Left tgtposc -= 1 elif numactionchosen == 3: # Right tgtposc += 1 else: raise ValueError("Wrong Action") reward = 0.0 if lab[tgtposr][tgtposc] == 1: # Hit wall! reward = -params['wp'] else: dist += 1 posc = tgtposc posr = tgtposr # Display the labyrinth #for numr in range(LABSIZE): # s = "" # for numc in range(LABSIZE): # if posr == numr and posc == numc: # s += "o" # elif rposr == numr and rposc == numc: # s += "X" # elif lab[numr, numc] == 1: # s += "#" # else: # s += " " # print(s) #print("") #print("") labg = lab.copy() labg[rposr, rposc] = 2 labg[posr, posc] = 3 fullimg = plt.imshow(labg, animated=True) ax_imgs.append([fullimg]) # Did we hit the reward location ? Increase reward and teleport! # Note that it doesn't matter if we teleport onto the reward, since reward hitting is only evaluated after the (obligatory) move if rposr == posr and rposc == posc: reward += params['rew'] if params['rsp'] == 1: posr = np.random.randint(1, LABSIZE - 1) posc = np.random.randint(1, LABSIZE - 1) while lab[posr, posc] == 1: posr = np.random.randint(1, LABSIZE - 1) posc = np.random.randint(1, LABSIZE - 1) else: posr = CTR posc = CTR #if PRINTTRACE: # #print("Step ", numstep, "- GI: ", goodinput, ", GA: ", goodaction, " Inputs: ", inputsN, " - Outputs: ", y.data.cpu().numpy(), " - action chosen: ", numactionchosen, # # " - inputthisstep:", inputthisstep, " - mean abs pw: ", np.mean(np.abs(pw.data.cpu().numpy())), " -Rew: ", reward) # print("Step ", numstep, " Inputs: ", inputs[0,:TOTALNBINPUTS], " - Outputs: ", y.data.cpu().numpy(), " - action chosen: ", numactionchosen, # " - mean abs pw: ", np.mean(np.abs(pw.data.cpu().numpy())), " -Reward (this step): ", reward) rewards.append(reward) vs.append(v) sumreward += reward logprobs.append(distrib.log_prob(actionchosen)) #if params['algo'] == 'A3C': loss += params['bentropy'] * y.pow(2).sum() # We want to penalize concentration, i.e. encourage diversity; our version of PyTorch does not have an entropy() function for Distribution, so we use this instead. ##if PRINTTRACE: ## print("Probabilities:", y.data.cpu().numpy(), "Picked action:", numactionchosen, ", got reward", reward) # Episode is done, now let's do the actual computations gammaR = params['gr'] if True: #params['algo'] == 'A3C': R = 0 for numstepb in reversed(range(params['eplen'])) : R = gammaR * R + rewards[numstepb] lossv += (vs[numstepb][0] - R).pow(2) loss -= logprobs[numstepb] * (R - vs[numstepb].data[0][0]) # Not sure if the "data" is needed... put it b/c of worry about weird gradient flows loss += params['blossv'] * lossv #elif params['algo'] == 'REI': # R = sumreward # baseline = meanrewards[rposr, rposc] # for numstepb in reversed(range(params['eplen'])) : # loss -= logprobs[numstepb] * (R - baseline) #elif params['algo'] == 'REINOB': # R = sumreward # for numstepb in reversed(range(params['eplen'])) : # loss -= logprobs[numstepb] * R #elif params['algo'] == 'REITMP': # R = 0 # for numstepb in reversed(range(params['eplen'])) : # R = gammaR * R + rewards[numstepb] # loss -= logprobs[numstepb] * R #elif params['algo'] == 'REITMPB': # R = 0 # for numstepb in reversed(range(params['eplen'])) : # R = gammaR * R + rewards[numstepb] # loss -= logprobs[numstepb] * (R - meanrewardstmp[rposr, rposc, numstepb]) #else: # raise ValueError("Which algo?") meanrewards[rposr, rposc] = (1.0 - params['nu']) * meanrewards[rposr, rposc] + params['nu'] * sumreward R = 0 for numstepb in reversed(range(params['eplen'])) : R = gammaR * R + rewards[numstepb] meanrewardstmp[rposr, rposc, numstepb] = (1.0 - params['nu']) * meanrewardstmp[rposr, rposc, numstepb] + params['nu'] * R loss /= params['eplen'] if True: #PRINTTRACE: if True: #params['algo'] == 'A3C': print("lossv: ", lossv.data.cpu().numpy()[0]) print ("Total reward for this episode:", sumreward, "Dist:", dist) #if numiter > 100: # Burn-in period for meanrewards # loss.backward() # optimizer.step() #torch.cuda.empty_cache() #print(sumreward) lossnum = loss.data[0] lossbetweensaves += lossnum all_losses_objective.append(lossnum) all_total_rewards.append(sumreward) #all_losses_v.append(lossv.data[0]) #total_loss += lossnum if True: #PRINTTRACE: print("lossv: ", lossv.data.cpu().numpy()[0]) print ("Total reward for this episode:", sumreward, "Dist:", dist) print("Saving animation....") anim = animation.ArtistAnimation(fig, ax_imgs, interval=200) anim.save('anim.gif', writer='imagemagick', fps=10) if __name__ == "__main__": #defaultParams = { # 'type' : 'lstm', # 'seqlen' : 200, # 'hiddensize': 500, # 'activ': 'tanh', # 'steplr': 10e9, # By default, no change in the learning rate # 'gamma': .5, # The annealing factor of learning rate decay for Adam # 'imagesize': 31, # 'nbiter': 30000, # 'lr': 1e-4, # 'test_every': 10, # 'save_every': 3000, # 'rngseed':0 #} parser = argparse.ArgumentParser() parser.add_argument("--file", help="params file") args = parser.parse_args(); argvars = vars(args); argdict = { k : argvars[k] for k in argvars if argvars[k] != None } train(argdict) ================================================ FILE: sr/makefigure.py ================================================ import numpy as np import glob import matplotlib.pyplot as plt import scipy from scipy import stats colorz = ['g', 'orange', 'r', 'b', 'c', 'm', 'y', 'k'] groupnames = glob.glob('./tmp/loss_SRB_addpw_2_alg_A3C_bent_0.1_blossv_0.1_bs_30_bv_0.1_clamp_0_cs_20_da_tanh_eplen_120_eps_1e-06_fm_1_gc_2.0_gr_0.9_hs_200_is_0_l2_0.0_lr_0.0001_nbiter_200000_ni_4_nu_0.1_pe_500_pf_0.0_rew_1.0_rule_hebb_type_*_wp_0.0_rngseed_0.txt') #Previous: #groupnames = glob.glob('./tmp8/loss_*eplen_251*densize_200*absize_11_*ndstart_1*rngseed_1.txt') #groupnames = glob.glob('./tmp8/loss_*eplen_251*densize_200*absize_11_*ndstart_1*rngseed_1.txt') #groupnames = glob.glob('./tmp/loss_*new*eplen_251*rngseed_0.txt') #groupnames = glob.glob('./tmp/loss_*new*eplen_250*rngseed_0.txt') plt.rc('font', size=14) def my_mavg(x, N): cumsum = np.cumsum(np.insert(x, 0, 0)) return (cumsum[N:] - cumsum[:-N]) / N plt.ion() #plt.figure(figsize=(5,4)) # Smaller figure = relative larger fonts plt.figure() allmedianls = [] alllosses = [] poscol = 0 minminlen = 999999 for numgroup, groupname in enumerate(groupnames): if "lstm" in groupname: continue g = groupname[:-6]+"*" print("====", groupname) fnames = glob.glob(g) fulllosses=[] losses=[] lgts=[] for fn in fnames: if True: if "seed_11" in fn: continue if "seed_12" in fn: continue if "seed_13" in fn: continue if "seed_14" in fn: continue if "seed_15" in fn: continue z = np.loadtxt(fn) z = z[::10] # Decimation - speed things up! #z = my_mavg(z, 20) # For each run, we average the losses over K successive (decimated) episodes - otherwise figure is unreadable due to noise! z = z[:1801] #if len(z) < 9000: # print(fn) # continue #z = z[:90] lgts.append(len(z)) fulllosses.append(z) minlen = min(lgts) if minlen < minminlen: minminlen = minlen print(minlen) #if minlen < 1000: # continue for z in fulllosses: losses.append(z[:minlen]) losses = np.array(losses) alllosses.append(losses) meanl = np.mean(losses, axis=0) stdl = np.std(losses, axis=0) #cil = stdl / np.sqrt(losses.shape[0]) * 1.96 # 95% confidence interval - assuming normality cil = stdl / np.sqrt(losses.shape[0]) * 2.5 # 95% confidence interval - approximated with the t-distribution for 7 d.f. (?) medianl = np.median(losses, axis=0) allmedianls.append(medianl) q1l = np.percentile(losses, 25, axis=0) q3l = np.percentile(losses, 75, axis=0) highl = np.max(losses, axis=0) lowl = np.min(losses, axis=0) #highl = meanl+stdl #lowl = meanl-stdl xx = range(len(meanl)) # xticks and labels #xt = range(0, len(meanl), 2000) xt = range(0, 1801, 500) xtl = [str(10 * 10 * i) for i in xt] # Because of decimation above, and only every 10th loss is recorded in the files if "plastic" in groupname: lbl = "Non-modulated plastic" elif "modplast" in groupname: lbl = "Simple modulation" elif "modul" in groupname: lbl = "Retroactive modulation" elif "rnn" in groupname: lbl = "Non-plastic" else: raise ValueError("Which type?") #plt.plot(my_mavg(meanl, 100), label=g) #, color='blue') #plt.fill_between(xx, lowl, highl, alpha=.2) #plt.fill_between(xx, q1l, q3l, alpha=.1) #plt.plot(meanl) #, color='blue') ####plt.plot(my_mavg(medianl, 100), label=g) #, color='blue') # my_mavg changes the number of points ! #plt.plot(my_mavg(q1l, 100), label=g, alpha=.3) #, color='blue') #plt.plot(my_mavg(q3l, 100), label=g, alpha=.3) #, color='blue') #plt.fill_between(xx, q1l, q3l, alpha=.2) #plt.plot(medianl, label=g) #, color='blue') AVGSIZE = 20 xlen = len(my_mavg(q1l, AVGSIZE)) plt.fill_between( range(xlen), my_mavg(q1l, AVGSIZE), my_mavg(q3l, AVGSIZE), alpha=.2, color=colorz[poscol % len(colorz)]) plt.plot(my_mavg(medianl, AVGSIZE), color=colorz[poscol % len(colorz)], label=lbl) # my_mavg changes the number of points ! #xlen = len(my_mavg(meanl, AVGSIZE)) #plt.plot(my_mavg(meanl, AVGSIZE), label=g, color=colorz[poscol % len(colorz)]) # my_mavg changes the number of points ! #plt.fill_between( range(xlen), my_mavg(meanl - cil, AVGSIZE), my_mavg(meanl + cil, AVGSIZE), alpha=.2, color=colorz[poscol % len(colorz)]) poscol += 1 #plt.fill_between( range(xlen), my_mavg(lowl, 100), my_mavg(highl, 100), alpha=.2, color=colorz[numgroup % len(colorz)]) #plt.plot(my_mavg(losses[0], 1000), label=g, color=colorz[numgroup % len(colorz)]) #for curve in losses[1:]: # plt.plot(my_mavg(curve, 1000), color=colorz[numgroup % len(colorz)]) ps = [] # Adapt for varying lengths across groups #for n in range(0, alllosses[0].shape[1], 3): for n in range(0, minminlen): ps.append(scipy.stats.ranksums(alllosses[0][:,n], alllosses[1][:,n]).pvalue) ps = np.array(ps) print(np.mean(ps[-500:] < .05), np.mean(ps[-500:] < .01)) plt.legend(loc='best', fontsize=14) #plt.xlabel('Loss (sum square diff. b/w final output and target)') plt.xlabel('Number of Episodes') plt.ylabel('Reward') plt.xticks(xt, xtl) #plt.tight_layout() ================================================ FILE: sr/modul.py ================================================ import pdb import torch import torch.nn as nn from torch.autograd import Variable import numpy as np import torch.nn.functional as F ##ttype = torch.FloatTensor; #ttype = torch.cuda.FloatTensor; #ttype = torch.FloatTensor; #ttype = torch.cuda.FloatTensor; class NonPlasticRNN(nn.Module): def __init__(self, params): super(NonPlasticRNN, self).__init__() # NOTE: 'outputsize' excludes the value and neuromodulator outputs! for paramname in ['outputsize', 'inputsize', 'hs', 'bs', 'fm']: if paramname not in params.keys(): raise KeyError("Must provide missing key in argument 'params': "+paramname) NBDA = 1 # For now we limit the number of neuromodulatory-output neurons to 1 # Doesn't work with our version of PyTorch: #self.device = torch.device("cuda:0" if self.params['device'] == 'gpu' else "cpu") self.params = params self.activ = F.tanh self.i2h = torch.nn.Linear(self.params['inputsize'], params['hs']).cuda() self.w = torch.nn.Parameter((.01 * torch.t(torch.rand(params['hs'], params['hs']))).cuda(), requires_grad=True) self.h2o = torch.nn.Linear(params['hs'], self.params['outputsize']).cuda() self.h2v = torch.nn.Linear(params['hs'], 1).cuda() def forward(self, inputs, hidden): #, hebb): BATCHSIZE = self.params['bs'] HS = self.params['hs'] # Here, the *rows* of w and hebb are the inputs weights to a single neuron # hidden = x, hactiv = y hactiv = self.activ(self.i2h(inputs).view(BATCHSIZE, HS, 1) + torch.matmul(self.w, hidden.view(BATCHSIZE, HS, 1))).view(BATCHSIZE, HS) #hactiv = self.activ(self.i2h(inputs).view(BATCHSIZE, HS, 1) + torch.matmul((self.w + torch.mul(self.alpha, hebb)), # hidden.view(BATCHSIZE, HS, 1))).view(BATCHSIZE, HS) activout = self.h2o(hactiv) # Pure linear, raw scores - will be softmaxed by the calling program valueout = self.h2v(hactiv) hidden = hactiv return activout, valueout, hidden #, hebb def initialZeroState(self): BATCHSIZE = self.params['bs'] return Variable(torch.zeros(BATCHSIZE, self.params['hs']), requires_grad=False ).cuda() class PlasticRNN(nn.Module): def __init__(self, params): super(PlasticRNN, self).__init__() # NOTE: 'outputsize' excludes the value and neuromodulator outputs! for paramname in ['outputsize', 'inputsize', 'hs', 'bs', 'fm']: if paramname not in params.keys(): raise KeyError("Must provide missing key in argument 'params': "+paramname) NBDA = 1 # For now we limit the number of neuromodulatory-output neurons to 1 # Doesn't work with our version of PyTorch: #self.device = torch.device("cuda:0" if self.params['device'] == 'gpu' else "cpu") self.params = params self.activ = F.tanh self.i2h = torch.nn.Linear(self.params['inputsize'], params['hs']).cuda() self.w = torch.nn.Parameter((.01 * torch.t(torch.rand(params['hs'], params['hs']))).cuda(), requires_grad=True) self.alpha = torch.nn.Parameter((.01 * torch.t(torch.rand(params['hs'], params['hs']))).cuda(), requires_grad=True) self.eta = torch.nn.Parameter((.1 * torch.ones(1)).cuda(), requires_grad=True) # Everyone has the same eta #self.h2DA = torch.nn.Linear(params['hs'], NBDA).cuda() self.h2o = torch.nn.Linear(params['hs'], self.params['outputsize']).cuda() self.h2v = torch.nn.Linear(params['hs'], 1).cuda() def forward(self, inputs, hidden, hebb): BATCHSIZE = self.params['bs'] HS = self.params['hs'] # Here, the *rows* of w and hebb are the inputs weights to a single neuron # hidden = x, hactiv = y hactiv = self.activ(self.i2h(inputs).view(BATCHSIZE, HS, 1) + torch.matmul((self.w + torch.mul(self.alpha, hebb)), hidden.view(BATCHSIZE, HS, 1))).view(BATCHSIZE, HS) activout = self.h2o(hactiv) # Pure linear, raw scores - will be softmaxed by the calling program valueout = self.h2v(hactiv) # Now computing the Hebbian updates... # deltahebb has shape BS x HS x HS # Each row of hebb contain the input weights to a neuron deltahebb = torch.bmm(hactiv.view(BATCHSIZE, HS, 1), hidden.view(BATCHSIZE, 1, HS)) # batched outer product...should it be other way round? hebb = torch.clamp(hebb + self.eta * deltahebb, min=-1.0, max=1.0) hidden = hactiv return activout, valueout, hidden, hebb def initialZeroHebb(self): return Variable(torch.zeros(self.params['bs'], self.params['hs'], self.params['hs']) , requires_grad=False).cuda() def initialZeroState(self): BATCHSIZE = self.params['bs'] return Variable(torch.zeros(BATCHSIZE, self.params['hs']), requires_grad=False ).cuda() class SimpleModulRNN(nn.Module): def __init__(self, params): super(SimpleModulRNN, self).__init__() # NOTE: 'outputsize' excludes the value and neuromodulator outputs! for paramname in ['outputsize', 'inputsize', 'hs', 'bs', 'fm']: if paramname not in params.keys(): raise KeyError("Must provide missing key in argument 'params': "+paramname) NBDA = 1 # For now we limit the number of neuromodulatory-output neurons to 1 # Doesn't work with our version of PyTorch: #self.device = torch.device("cuda:0" if self.params['device'] == 'gpu' else "cpu") self.params = params self.activ = F.tanh self.i2h = torch.nn.Linear(self.params['inputsize'], params['hs']).cuda() self.w = torch.nn.Parameter((.01 * torch.t(torch.rand(params['hs'], params['hs']))).cuda(), requires_grad=True) self.alpha = torch.nn.Parameter((.01 * torch.t(torch.rand(params['hs'], params['hs']))).cuda(), requires_grad=True) self.eta = torch.nn.Parameter((.1 * torch.ones(1)).cuda(), requires_grad=True) # Everyone has the same eta (only for the non-modulated part, if any!) self.h2DA = torch.nn.Linear(params['hs'], NBDA).cuda() self.h2o = torch.nn.Linear(params['hs'], self.params['outputsize']).cuda() self.h2v = torch.nn.Linear(params['hs'], 1).cuda() def forward_test(self, inputs, hidden, hebb): NBDA = 1 BATCHSIZE = self.params['bs'] HS = self.params['hs'] hactiv = self.activ(self.i2h(inputs).view(BATCHSIZE, HS, 1) + torch.matmul(self.w, hidden.view(BATCHSIZE, HS, 1))).view(BATCHSIZE, HS) activout = self.h2o(hactiv) # Pure linear, raw scores - will be softmaxed by the calling program valueout = self.h2v(hactiv) return activout, valueout, 0, hidden, hebb def forward(self, inputs, hidden, hebb): NBDA = 1 BATCHSIZE = self.params['bs'] HS = self.params['hs'] # Here, the *rows* of w and hebb are the inputs weights to a single neuron # hidden = x, hactiv = y hactiv = self.activ(self.i2h(inputs).view(BATCHSIZE, HS, 1) + torch.matmul((self.w + torch.mul(self.alpha, hebb)), hidden.view(BATCHSIZE, HS, 1))).view(BATCHSIZE, HS) activout = self.h2o(hactiv) # Pure linear, raw scores - will be softmaxed by the calling program valueout = self.h2v(hactiv) # Now computing the Hebbian updates... # With batching, DAout is a matrix of size BS x 1 (Really BS x NBDA, but we assume NBDA=1 for now in the deltahebb multiplication below) if self.params['da'] == 'tanh': DAout = F.tanh(self.h2DA(hactiv)) elif self.params['da'] == 'sig': DAout = F.sigmoid(self.h2DA(hactiv)) elif self.params['da'] == 'lin': DAout = self.h2DA(hactiv) else: raise ValueError("Which transformation for DAout ?") # deltahebb has shape BS x HS x HS # Each row of hebb contain the input weights to a neuron deltahebb = torch.bmm(hactiv.view(BATCHSIZE, HS, 1), hidden.view(BATCHSIZE, 1, HS)) # batched outer product...should it be other way round? hebb1 = torch.clamp(hebb + DAout.view(BATCHSIZE, 1, 1) * deltahebb, min=-1.0, max=1.0) if self.params['fm'] == 0: # Non-modulated part hebb2 = torch.clamp(hebb + self.eta * deltahebb, min=-1.0, max=1.0) # Soft Clamp (note that it's different from just putting a tanh on top of a freely varying value): #hebb1 = torch.clamp( hebb + torch.clamp(DAout.view(BATCHSIZE, 1, 1) * deltahebb, min=0.0) * (1 - hebb) + # torch.clamp(DAout.view(BATCHSIZE, 1, 1) * deltahebb, max=0.0) * (hebb + 1) , min=-1.0, max=1.0) #hebb2 = torch.clamp( hebb + torch.clamp(self.eta * deltahebb, min=0.0) * (1 - hebb) + torch.clamp(self.eta * deltahebb, max=0.0) * (hebb + 1) , min=-1.0, max=1.0) # Purely additive, no clamping. This will almost certainly diverge, don't use it! #hebb1 = hebb + DAout.view(BATCHSIZE, 1, 1) * deltahebb #hebb2 = hebb + self.eta * deltahebb if self.params['fm'] == 1: hebb = hebb1 elif self.params['fm'] == 0: # Combine the modulated and non-modulated part hebb = torch.cat( (hebb1[:, :self.params['hs']//2, :], hebb2[:, self.params['hs'] // 2:, :]), dim=1) # Maybe along dim=2 instead?... else: raise ValueError("Must select whether fully modulated or not (params['fm'])") hidden = hactiv return activout, valueout, DAout, hidden, hebb def initialZeroHebb(self): return Variable(torch.zeros(self.params['bs'], self.params['hs'], self.params['hs']) , requires_grad=False).cuda() def initialZeroState(self): BATCHSIZE = self.params['bs'] return Variable(torch.zeros(BATCHSIZE, self.params['hs']), requires_grad=False ).cuda() class RetroModulRNN(nn.Module): def __init__(self, params): super(RetroModulRNN, self).__init__() # NOTE: 'outputsize' excludes the value and neuromodulator outputs! for paramname in ['outputsize', 'inputsize', 'hs', 'bs', 'fm']: if paramname not in params.keys(): raise KeyError("Must provide missing key in argument 'params': "+paramname) NBDA = 1 # For now we limit the number of neuromodulatory-output neurons to 1 # Doesn't work with our version of PyTorch: #self.device = torch.device("cuda:0" if self.params['device'] == 'gpu' else "cpu") self.params = params self.activ = F.tanh self.i2h = torch.nn.Linear(self.params['inputsize'], params['hs']).cuda() self.w = torch.nn.Parameter((.01 * torch.t(torch.rand(params['hs'], params['hs']))).cuda(), requires_grad=True) self.alpha = torch.nn.Parameter((.01 * torch.t(torch.rand(params['hs'], params['hs']))).cuda(), requires_grad=True) self.eta = torch.nn.Parameter((.1 * torch.ones(1)).cuda(), requires_grad=True) # Everyone has the same eta (only for the non-modulated part, if any!) self.etaet = torch.nn.Parameter((.1 * torch.ones(1)).cuda(), requires_grad=True) # Everyone has the same etaet self.h2DA = torch.nn.Linear(params['hs'], NBDA).cuda() self.h2o = torch.nn.Linear(params['hs'], self.params['outputsize']).cuda() self.h2v = torch.nn.Linear(params['hs'], 1).cuda() def forward(self, inputs, hidden, hebb, et, pw): NBDA = 1 BATCHSIZE = self.params['bs'] HS = self.params['hs'] hactiv = self.activ(self.i2h(inputs).view(BATCHSIZE, HS, 1) + torch.matmul((self.w + torch.mul(self.alpha, pw)), hidden.view(BATCHSIZE, HS, 1))).view(BATCHSIZE, HS) activout = self.h2o(hactiv) # Pure linear, raw scores - will be softmaxed later valueout = self.h2v(hactiv) # Now computing the Hebbian updates... # With batching, DAout is a matrix of size BS x 1 (Really BS x NBDA, but we assume NBDA=1 for now in the deltahebb multiplication below) if self.params['da'] == 'tanh': DAout = F.tanh(self.h2DA(hactiv)) elif self.params['da'] == 'sig': DAout = F.sigmoid(self.h2DA(hactiv)) elif self.params['da'] == 'lin': DAout = self.h2DA(hactiv) else: raise ValueError("Which transformation for DAout ?") if self.params['rule'] == 'hebb': deltahebb = torch.bmm(hactiv.view(BATCHSIZE, HS, 1), hidden.view(BATCHSIZE, 1, HS)) # batched outer product...should it be other way round? elif self.params['rule'] == 'oja': deltahebb = torch.mul(hactiv.view(BATCHSIZE, HS, 1), (hidden.view(BATCHSIZE, 1, HS) - torch.mul(self.w.view(1, HS, HS), hactiv.view(BATCHSIZE, HS, 1)))) else: raise ValueError("Must specify learning rule ('hebb' or 'oja')") # Hard clamp deltapw = DAout.view(BATCHSIZE,1,1) * et pw1 = torch.clamp(pw + deltapw, min=-1.0, max=1.0) # Should we have a fully neuromodulated network, or only half? if self.params['fm'] == 1: pw = pw1 elif self.params['fm']==0: hebb = torch.clamp(hebb + self.eta * deltahebb, min=-1.0, max=1.0) pw = torch.cat( (hebb[:, :self.params['hs']//2, :], pw1[:, self.params['hs'] // 2:, :]), dim=1) # Maybe along dim=2 instead?... else: raise ValueError("Must select whether fully modulated or not") # Updating the eligibility trace - always a simple decay term. # Note that self.etaet != self.eta (which is used for hebb, i.e. the non-modulated part) deltaet = deltahebb et = (1 - self.etaet) * et + self.etaet * deltaet hidden = hactiv return activout, valueout, DAout, hidden, hebb, et, pw def initialZeroHebb(self): return Variable(torch.zeros(self.params['bs'], self.params['hs'], self.params['hs']) , requires_grad=False).cuda() def initialZeroPlasticWeights(self): return Variable(torch.zeros(self.params['bs'], self.params['hs'], self.params['hs']) , requires_grad=False).cuda() def initialZeroState(self): return Variable(torch.zeros(self.params['bs'], self.params['hs']), requires_grad=False ).cuda() ================================================ FILE: sr/opus.docker.old ================================================ #tmiconi_rl #latest #. #FROM localhost:5000/opus-deep-learning:master-test-2017_9_7_20_56_10 #FROM opus-deep-learning:master-test-2018_1_3_0_38_14 FROM opus-deep-learning:master-prod-2018_9_20_18_2_31 RUN mkdir /home/work COPY ./*.py /home/work/ ENV LC_ALL C.UTF-8 ENV LANG C.UTF-8 ================================================ FILE: sr/plotmodulator.py ================================================ import numpy as np; import matplotlib.pyplot as plt c = np.load('cueshown0.dat.npy'); r = np.load('rewardsprevstep0.dat.npy') ; m = np.load('modulator0.dat.npy') params = {'legend.fontsize': 'x-large', 'axes.labelsize': 'x-large', 'axes.titlesize':'x-large', 'xtick.labelsize':'x-large', 'ytick.labelsize':'x-large'} plt.rcParams.update(params) fig = plt.figure(figsize=(13,10)) for numgraph in range(c.shape[0]): finalgraph=0 if numgraph == c.shape[0] - 1: finalgraph=1 ax1 = plt.subplot(c.shape[0]+1, 1, numgraph+1) if numgraph == 0: ax1.set_title('Retroactive neuromodulation') z = np.zeros((6, c[numgraph].size)) for nn in range(c[numgraph].size): z[np.int(c[numgraph][nn]+1), nn]=1 if finalgraph: ax1.set_xlabel('Timestep') ax1.set_xlim(-.5,120.5) ax1.set_ylim(-.5,5.5) ax1.imshow(1-z, cmap='gray',clim=(-1,1), aspect='auto') ax1.set_yticks([0,1,2,3,4,5]) ax1.set_yticklabels(labels=["No cue", "Cue 1", "Cue 2", "Cue 3", "Cue 4", "Response cue"]) ax2 = ax1.twinx() ax2.set_ylim(-1,1) ax2.plot(m[numgraph], label="Modulator", lw=2) ax2.plot(r[numgraph], label="Reward", lw=2) ax2.plot(np.zeros_like(r[numgraph]), 'k:') if finalgraph: ax2.legend(loc='upper left', bbox_to_anchor=(0, -.2)) plt.tight_layout() # Too tight! #fig.subplots_adjust(hspace=0.5) plt.show() ================================================ FILE: sr/plotresults.py ================================================ import numpy as np import glob import matplotlib.pyplot as plt import scipy from scipy import stats colorz = ['r', 'b', 'g', 'c', 'm', 'y', 'orange', 'k'] #groupnames = glob.glob('./tmp/loss*CS*cs_10*is_0*lr_3*seed_0.txt') + glob.glob('./tmp/loss*CS*eplen_50*seed_0.txt') #groupnames = glob.glob('./tmp/loss*CS*cs_20*eplen_75*eps_1e-06*is_0*seed_0.txt') # Least bad; lr 1e-4: modul unstable, 3e-5: slow, modul even slower #groupnames = glob.glob('./tmp/loss*CS*cs_20*eplen_75*eps_1e-06*gc*is_0*lr_0.00*seed_0.txt') #groupnames = glob.glob('./tmp/loss*gc_7.*seed_0.txt') # see gc 10, 7, 20. For a comparison of many gc's, look at modplast only. #groupnames = glob.glob('./tmp/loss*ni_4*seed_0.txt') #groupnames = glob.glob('./tmp/loss*SRB*seed_0.txt') groupnames = glob.glob('./tmp/loss*SRB*bent_0.1*cs_*gc_2.0*ni_4*seed_0.txt') #groupnames = glob.glob('./tmp/loss*SRB*ni_2*seed_0.txt') #groupnames = glob.glob('./tmp/loss*lvlB*ni_2*seed_0.txt') ; groupnames = [x for x in groupnames if not 'modul2' in x] #groupnames = glob.glob('./tmp/loss*Rnd*ni_2*seed_0.txt') ; groupnames = [x for x in groupnames if not 'modul2' in x] #groupnames = glob.glob('./tmp/loss*CS*cs_20*eps_1e-06*is_0*seed_0.txt') #groupnames = glob.glob('./tmp/loss*eps*seed_0.txt') #groupnames = glob.glob('./tmp/loss*NewAdam*addpw_*seed_0.txt') #groupnames = glob.glob('./tmp/loss*EASY*addpw_*ni_2*seed_0.txt') #groupnames = glob.glob('./tmp/loss*SGD*ni_2*seed_0.txt') #groupnames = glob.glob('./tmp/loss*ni_2*seed_0.txt') #groupnames = glob.glob('./tmp/loss*eplen_140*ni_2*seed_0.txt') #groupnames = glob.glob('./tmp/loss*seed_0.txt') # If you can only use 7 runs, smooth the losses within each run to obtain more reliable estimates of performance! def mavg(x, N): cumsum = np.cumsum(np.insert(x, 0, 0)) return (cumsum[N:] - cumsum[:-N]) / N plt.ion() #plt.figure(figsize=(5,4)) # Smaller figure = relative larger fonts plt.figure() allmedianls = [] alllosses = [] poscol = 0 maxminlen = 0 minminlen = 999999 for numgroup, groupname in enumerate(groupnames): if "batch" in groupname: continue #if "lstm" not in groupname: # continue g = groupname[:-6]+"*" print("====", groupname) fnames = glob.glob(g) fulllosses=[] losses=[] lgts=[] for fn in fnames: if "COPY" in fn: continue if False: #if "seed_4" in fn: # continue #if "seed_7" in fn: # continue if "seed_3" in fn: continue #if "seed_9" in fn: # continue #if "seed_10" in fn: # continue if "seed_11" in fn: continue if "seed_12" in fn: continue if "seed_13" in fn: continue if "seed_14" in fn: continue if "seed_15" in fn: continue z = np.loadtxt(fn) #z = mavg(z, 10) # For each run, we average the losses over K successive episodes z = z[::10] # Decimation - speed things up! z = z[:1800] print(fn, len(z)) if False: if len(z) < 300: print(fn, len(z)) continue lgts.append(len(z)) fulllosses.append(z) minlen = min(lgts) if minlen > maxminlen: maxminlen = minlen if minlen < minminlen: minminlen = minlen print("Minlen:", minlen) #if minlen < 1000: # continue for z in fulllosses: losses.append(z[:minlen]) losses = np.array(losses) alllosses.append(losses) meanl = np.mean(losses, axis=0) stdl = np.std(losses, axis=0) cil = stdl / np.sqrt(losses.shape[0]) * 1.96 # 95% confidence interval - assuming normality #cil = stdl / np.sqrt(losses.shape[0]) * 2.5 # 95% confidence interval - approximated with the t-distribution for 7 d.f. medianl = np.median(losses, axis=0) allmedianls.append(medianl) q1l = np.percentile(losses, 25, axis=0) q3l = np.percentile(losses, 75, axis=0) highl = np.max(losses, axis=0) lowl = np.min(losses, axis=0) #highl = meanl+stdl #lowl = meanl-stdl xx = range(len(meanl)) # xticks and labels xt = range(0, maxminlen, 500) #xt = range(0, len(meanl), 100) #xt = range(0, len(meanl), 1000) #xt = range(0, 10001, 2000) xtl = [str(10 * 10 * i) for i in xt] # Because of decimation above, and only every 10th loss is recorded in the files #plt.plot(mavg(meanl, 100), label=g) #, color='blue') #plt.fill_between(xx, lowl, highl, alpha=.2) #plt.fill_between(xx, q1l, q3l, alpha=.1) #plt.plot(meanl) #, color='blue') ####plt.plot(mavg(medianl, 100), label=g) #, color='blue') # mavg changes the number of points ! #plt.plot(mavg(q1l, 100), label=g, alpha=.3) #, color='blue') #plt.plot(mavg(q3l, 100), label=g, alpha=.3) #, color='blue') #plt.fill_between(xx, q1l, q3l, alpha=.2) #plt.plot(medianl, label=g) #, color='blue') AVGSIZE = 10 # 20 xlen = len(mavg(q1l, AVGSIZE)) #mylabel = g[g.find('type'):] mylabel = g if numgroup < 8: zestyle = '-' else: zestyle = '--' zew=2 #if 'tanh' in g: # zew = 3 #elif 'sig' in g: # zew = 1 #if 'pw_3' in g: # zew = 3 #elif 'pw_2' in g: # zew = 1 #else: # raise ValueError("Which width?") plt.plot(mavg(medianl, AVGSIZE), label=mylabel, color=colorz[poscol % len(colorz)], ls=zestyle, lw=zew) # mavg changes the number of points ! plt.fill_between( range(xlen), mavg(q1l, AVGSIZE), mavg(q3l, AVGSIZE), alpha=.2, color=colorz[poscol % len(colorz)]) #xlen = len(mavg(meanl, AVGSIZE)) #plt.plot(mavg(meanl, AVGSIZE), label=g, color=colorz[poscol % len(colorz)]) # mavg changes the number of points ! #plt.fill_between( range(xlen), mavg(meanl - cil, AVGSIZE), mavg(meanl + cil, AVGSIZE), alpha=.2, color=colorz[poscol % len(colorz)]) poscol += 1 #plt.fill_between( range(xlen), mavg(lowl, 100), mavg(highl, 100), alpha=.2, color=colorz[numgroup % len(colorz)]) #plt.plot(mavg(losses[0], 1000), label=g, color=colorz[numgroup % len(colorz)]) #for curve in losses[1:]: # plt.plot(mavg(curve, 1000), color=colorz[numgroup % len(colorz)]) ps = [] # Adapt for varying lengths across groups #for n in range(0, alllosses[0].shape[1], 3): #for n in range(0, minminlen): # ps.append(scipy.stats.ranksums(alllosses[0][:,n], alllosses[1][:,n]).pvalue) #ps = np.array(ps) plt.legend(loc='best', fontsize=6) #plt.xlabel('Loss (sum square diff. b/w final output and target)') plt.xlabel('Number of Episodes') plt.ylabel('Loss') plt.xticks(xt, xtl) #plt.tight_layout() ================================================ FILE: sr/request.json ================================================ { "dockerImage":"tmiconi_rl", "tag":"master-test-2018_9_21_9_55_16", "name":"Exp3lvlCS5_gc5_15runs_bent0.03_bv0.1_hs200_rew1_wp0_A3C_clamp0_eplen120_addpw3_ni4_l20_modplast_datanh_fm1_pf0_lr1e-4_cs10_eps1e-6_is0_NFS", "cpus":2.0, "cmdLine":"export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \u0026\u0026 PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/nvidia/bin:/opt/hadoop/latest/bin \u0026\u0026 export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64/ \u0026\u0026 export LC_ALL=C.UTF-8 \u0026\u0026 export LANG=C.UTF-8 \u0026\u0026 cd /home/work/ \u0026\u0026 python3 sr.py --eplen 120 --hs 200 --rule hebb --lr 1e-4 --l2 0 --addpw 3 --pe 1000 --bv 0.1 --bent 0.03 --rew 1 --wp 0 --save_every 5000 --type modplast --da tanh --clamp 0 --nbiter 200000 --fm 1 --ni 4 --pf .0 --alg A3C --cs 10 --eps 1e-6 --is 0 --gc 5.0 --rngseed {{mesos.instance}}", "ramMB":6000, "gpus":1, "diskMB":6000, "cluster":"opusprodda1", "environment":"devel", "user":"tmiconi", "resourcePool": "/ailabs/p1/tmiconi", "instances":10, "isService":false, "cronSchedule":"", "custom":{}, "application":"testversion", "maxRetries":1, "constraints":{"sku":"1080ti"}, "accessTypes":[], "dependencies":[], "cronCollisionPolicy":"CANCEL_NEW", "emailOnFail":[], "emailOnSucceed":[] } ================================================ FILE: sr/request_batch.json ================================================ { "dockerImage":"tmiconi_rl", "tag":"master-test-2018_10_9_15_13_17", "name":"ExpSRbatch6_gc2.0_10runs_bent0.1_bv0.1_hs200_rew1_wp0_A3C_clamp0_eplen120_ni4_l20_modul_datanh_fm1_pf0_lr1e-4_cs20_eps1e-6_is0_NFS", "cpus":2.0, "cmdLine":"export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \u0026\u0026 PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/nvidia/bin:/opt/hadoop/latest/bin \u0026\u0026 export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64/ \u0026\u0026 export LC_ALL=C.UTF-8 \u0026\u0026 export LANG=C.UTF-8 \u0026\u0026 cd /home/work/ \u0026\u0026 python3 srbatch.py --eplen 120 --hs 200 --lr 1e-4 --l2 0 --pe 500 --bv 0.1 --bent 0.1 --rew 1 --wp 0 --save_every 2000 --type modul --da tanh --clamp 0 --nbiter 200000 --fm 1 --ni 4 --pf .0 --alg A3C --cs 20 --eps 1e-6 --is 0 --bs 30 --gc 2.0 --rngseed {{mesos.instance}}", "ramMB":6000, "gpus":1, "diskMB":6000, "cluster":"opusprodda1", "environment":"devel", "user":"tmiconi", "resourcePool": "/ailabs/p1/tmiconi", "instances":10, "isService":false, "cronSchedule":"", "custom":{}, "application":"testversion", "maxRetries":1, "constraints":{"sku":"1080ti"}, "accessTypes":[], "dependencies":[], "cronCollisionPolicy":"CANCEL_NEW", "emailOnFail":[], "emailOnSucceed":[] } ================================================ FILE: sr/request_easy.json ================================================ { "dockerImage":"tmiconi_rl", "tag":"master-test-2018_10_4_11_45_47", "name":"ExpSRbatch3_gc2.5_10runs_bent0.1_bv0.1_hs200_rew1_wp0_A3C_clamp0_eplen75_ni2_l20_plastic_datanh_fm1_pf0_lr1e-4_cs2_eps1e-6_is0_NFS", "cpus":2.0, "cmdLine":"export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \u0026\u0026 PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/nvidia/bin:/opt/hadoop/latest/bin \u0026\u0026 export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64/ \u0026\u0026 export LC_ALL=C.UTF-8 \u0026\u0026 export LANG=C.UTF-8 \u0026\u0026 cd /home/work/ \u0026\u0026 python3 srbatch.py --eplen 75 --hs 200 --lr 1e-4 --l2 0 --pe 500 --bv 0.1 --bent 0.1 --rew 1 --wp 0 --save_every 2000 --type plastic --da tanh --clamp 0 --nbiter 100000 --fm 1 --ni 2 --pf .0 --alg A3C --cs 2 --eps 1e-6 --is 0 --bs 30 --gc 2.5 --rngseed {{mesos.instance}}", "ramMB":6000, "gpus":1, "diskMB":6000, "cluster":"opusprodda1", "environment":"devel", "user":"tmiconi", "resourcePool": "/ailabs/p1/tmiconi", "instances":10, "isService":false, "cronSchedule":"", "custom":{}, "application":"testversion", "maxRetries":1, "constraints":{"sku":"1080ti"}, "accessTypes":[], "dependencies":[], "cronCollisionPolicy":"CANCEL_NEW", "emailOnFail":[], "emailOnSucceed":[] } ================================================ FILE: sr/srbatch.py ================================================ # Stimulus-response task as described in Miconi et al. ICLR 2019. # Copyright (c) 2018-2019 Uber Technologies, Inc. # # Licensed under the Uber Non-Commercial License (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at the root directory of this project. import argparse import pdb import torch import torch.nn as nn from torch.autograd import Variable import numpy as np from numpy import random import torch.nn.functional as F from torch import optim from torch.optim import lr_scheduler import random import sys import pickle import time import os import platform #import makemaze import numpy as np #import matplotlib.pyplot as plt import glob import modul # The code for the actual backrpopamine network np.set_printoptions(precision=4) ADDINPUT = 4 # 1 inputs for the previous reward, 1 inputs for numstep, 1 unused, 1 "Bias" inputs def train(paramdict): #params = dict(click.get_current_context().params) #params['inputsize'] = RFSIZE * RFSIZE + ADDINPUT + NBNONRESTACTIONS print("Starting training...") params = {} #params.update(defaultParams) params.update(paramdict) print("Passed params: ", params) print(platform.uname()) #params['nbsteps'] = params['nbshots'] * ((params['prestime'] + params['interpresdelay']) * params['nbclasses']) + params['prestimetest'] # Total number of steps per episode suffix = "SRB_"+"".join([str(x)+"_" if pair[0] != 'pe' and pair[0] != 'nbsteps' and pair[0] != 'rngseed' and pair[0] != 'save_every' and pair[0] != 'test_every' else '' for pair in sorted(zip(params.keys(), params.values()), key=lambda x:x[0] ) for x in pair])[:-1] + "_rngseed_" + str(params['rngseed']) # Turning the parameters into a nice suffix for filenames print(suffix) #NBINPUTBITS = params['ni'] + 1 NBINPUTBITS = params['cs'] + 1 # The additional bit is for the response cue (i.e. the "Go" cue) params['outputsize'] = 2 # "response" and "no response" params['inputsize'] = NBINPUTBITS + params['outputsize'] + ADDINPUT # The total number of input bits is the size of inputs, plus the "response cue" input, plus the number of actions, plus the number of additional inputs # This doesn't work with our version of PyTorch #params['device'] = 'gpu' #device = torch.device("cuda:0" if self.params['device'] == 'gpu' else "cpu") BS = params['bs'] # Initialize random seeds (first two redundant?) print("Setting random seeds") np.random.seed(params['rngseed']); random.seed(params['rngseed']); torch.manual_seed(params['rngseed']) #print(click.get_current_context().params) print("Initializing network") if params['type'] == 'modul': net = modul.RetroModulRNN(params) elif params['type'] == 'modplast': net = modul.SimpleModulRNN(params) elif params['type'] == 'plastic': net = modul.PlasticRNN(params) elif params['type'] == 'rnn': net = modul.NonPlasticRNN(params) else: raise ValueError("Network type unknown or not yet implemented: "+params['type']) print ("Shape of all optimized parameters:", [x.size() for x in net.parameters()]) allsizes = [torch.numel(x.data.cpu()) for x in net.parameters()] print ("Size (numel) of all optimized elements:", allsizes) print ("Total size (numel) of all optimized elements:", sum(allsizes)) #total_loss = 0.0 print("Initializing optimizer") #optimizer = torch.optim.SGD(net.parameters(), lr=1.0*params['lr'], weight_decay=params['l2']) #optimizer = torch.optim.RMSprop(net.parameters(), lr=1.0*params['lr'], weight_decay=params['l2']) optimizer = torch.optim.Adam(net.parameters(), lr=1.0*params['lr'], eps=params['eps'], weight_decay=params['l2']) #optimizer = torch.optim.Adam(net.parameters(), lr=1.0*params['lr'], eps=1e-4, weight_decay=params['l2']) #optimizer = torch.optim.SGD(net.parameters(), lr=1.0*params['lr']) #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, gamma=params['gamma'], step_size=params['steplr']) #LABSIZE = params['lsize'] #lab = np.ones((LABSIZE, LABSIZE)) #CTR = LABSIZE // 2 # Simple cross maze #lab[CTR, 1:LABSIZE-1] = 0 #lab[1:LABSIZE-1, CTR] = 0 # Double-T maze #lab[CTR, 1:LABSIZE-1] = 0 #lab[1:LABSIZE-1, 1] = 0 #lab[1:LABSIZE-1, LABSIZE - 2] = 0 # Grid maze #lab[1:LABSIZE-1, 1:LABSIZE-1].fill(0) #for row in range(1, LABSIZE - 1): # for col in range(1, LABSIZE - 1): # if row % 2 == 0 and col % 2 == 0: # lab[row, col] = 1 #lab[CTR,CTR] = 0 # Not strictly necessary, but perhaps helps loclization by introducing a detectable irregularity in the center #LABSIZE = params['msize'] #lab = np.ones((LABSIZE, LABSIZE)) #CTR = LABSIZE // 2 ## Grid maze #lab[1:LABSIZE-1, 1:LABSIZE-1].fill(0) #for row in range(1, LABSIZE - 1): # for col in range(1, LABSIZE - 1): # if row % 2 == 0 and col % 2 == 0: # lab[row, col] = 1 #lab[CTR,CTR] = 0 # Not strictly necessary, but perhaps helps loclization by introducing a detectable irregularity in the center all_losses = [] all_grad_norms = [] all_losses_objective = [] all_total_rewards = [] all_losses_v = [] lossbetweensaves = 0 nowtime = time.time() #meanreward = np.zeros((LABSIZE, LABSIZE)) meanreward = np.zeros(params['ni']) meanrewardT = np.zeros((params['ni'], params['eplen'])) nbtrials = [0]*BS totalnbtrials = 0 nbtrialswithcc = 0 print("Starting episodes!") for numepisode in range(params['nbiter']): PRINTTRACE = 0 #if (numepisode+1) % (1 + params['pe']) == 0: if (numepisode+1) % (params['pe']) == 0: PRINTTRACE = 1 #lab = makemaze.genmaze(size=LABSIZE, nblines=4) #count = np.zeros((LABSIZE, LABSIZE)) # # Select the reward location for this episode - not on a wall! # rposr = 0; rposc = 0 # while lab[rposr, rposc] == 1: # rposr = np.random.randint(1, LABSIZE - 1) # rposc = np.random.randint(1, LABSIZE - 1) # # We always start the episode from the center (when hitting reward, we may teleport either to center or to a random location depending on params['rsp']) # posc = CTR # posr = CTR optimizer.zero_grad() loss = 0 lossv = 0 hidden = net.initialZeroState() if params['type'] != 'rnn': hebb = net.initialZeroHebb() if params['type'] == 'modul': et = net.initialZeroHebb() # Eligibility Trace is identical to Hebbian Trace in shape pw = net.initialZeroPlasticWeights() numactionchosen = 0 # Generate the cues. Make sure they're all different (important when using very small cues for debugging, e.g. cs=2, ni=2) cuedata=[] for nb in range(BS): cuedata.append([]) for ncue in range(params['ni']): assert len(cuedata[nb]) == ncue foundsame = 1 cpt = 0 while foundsame > 0 : cpt += 1 if cpt > 10000: # This should only occur with very weird parameters, e.g. cs=2, ni>4 raise ValueError("Could not generate a full list of different cues") foundsame = 0 candidate = np.random.randint(2, size=params['cs']) * 2 - 1 for backtrace in range(ncue): if np.array_equal(cuedata[nb][backtrace], candidate): foundsame = 1 cuedata[nb].append(candidate) reward = np.zeros(BS) sumreward = np.zeros(BS) rewards = [] vs = [] logprobs = [] cues=[] for nb in range(BS): cues.append([]) dist = 0 numactionschosen = np.zeros(BS, dtype='int32') #reward = 0.0 #rewards = [] #vs = [] #logprobs = [] #sumreward = 0.0 nbtrials = np.zeros(BS) nbrewardabletrials = np.zeros(BS) thistrialhascorrectcue = np.zeros(BS) triallength = np.zeros(BS, dtype='int32') correctcue = np.random.randint(params['ni'], size=BS) trialstep = np.zeros(BS, dtype='int32') #print("EPISODE ", numepisode) for numstep in range(params['eplen']): #if params['clamp'] == 0: inputs = np.zeros((BS, params['inputsize']), dtype='float32') #else: # inputs = np.zeros((1, params['hs']), dtype='float32') for nb in range(BS): if trialstep[nb] == 0: thistrialhascorrectcue[nb] = 0 # Trial length is randomly modulated for each trial; first time step always -1 (i.e. no input cue), last time step always response-cue (i.e. NBINPUTBITS-1). #triallength = params['ni'] // 2 + 3 + np.random.randint(1 + params['ni']) # 3 fixed-cue time steps (1st, last and next-to-last) + some random nb of no-cue time steps triallength[nb] = params['ni'] // 2 + 3 + np.random.randint(params['ni']) # 3 fixed-cue time steps (1st, last and next-to-last) + some random nb of no-cue time steps # In any trial, we only show half the cues (randomly chosen), once each: mycues = [x for x in range(params['ni'])] random.shuffle(mycues); mycues = mycues[:len(mycues) // 2] # The rest is filled with no-input time steps (i.e. cue = -1), but also with the 3 fixed-cue steps (1st, last, next-to-last) for nc in range(triallength[nb] - 3 - len(mycues)): mycues.append(-1) random.shuffle(mycues) mycues.insert(0, -1); mycues.append(params['ni']); mycues.append(-1) # The first and last time step have no input (cue -1), the next-to-last has the response cue. assert(len(mycues) == triallength[nb]) cues[nb] = mycues inputs[nb, :NBINPUTBITS] = 0 if cues[nb][trialstep[nb]] > -1 and cues[nb][trialstep[nb]] < params['ni']: #inputs[0, cues[trialstep]] = 1.0 inputs[nb, :NBINPUTBITS-1] = cuedata[nb][cues[nb][trialstep[nb]]][:] if cues[nb][trialstep[nb]] == correctcue[nb]: thistrialhascorrectcue[nb] = 1 if cues[nb][trialstep[nb]] == params['ni']: inputs[nb, NBINPUTBITS-1] = 1 # "Go" cue inputs[nb, NBINPUTBITS + 0] = 1.0 # Bias neuron, probably not necessary inputs[nb,NBINPUTBITS + 1] = numstep / params['eplen'] inputs[nb, NBINPUTBITS + 2] = 1.0 * reward[nb] # Reward from previous time step if numstep > 0: inputs[nb, NBINPUTBITS + ADDINPUT + numactionschosen[nb]] = 1 # Previously chosen action inputsC = torch.from_numpy(inputs).cuda() # Might be better: #if rposr == posr and rposc = posc: # inputs[0][-4] = 100.0 #else: # inputs[0][-4] = 0 # Running the network ## Running the network if params['type'] == 'modplast': y, v, DAout, hidden, hebb = net(Variable(inputsC, requires_grad=False), hidden, hebb) # y should output raw scores, not probas elif params['type'] == 'modul': y, v, DAout, hidden, hebb, et, pw = net(Variable(inputsC, requires_grad=False), hidden, hebb, et, pw) # y should output raw scores, not probas elif params['type'] == 'plastic': y, v, hidden, hebb = net(Variable(inputsC, requires_grad=False), hidden, hebb) # y should output raw scores, not probas elif params['type'] == 'rnn': y, v, hidden = net(Variable(inputsC, requires_grad=False), hidden) # y should output raw scores, not probas else: raise ValueError("Network type unknown or not yet implemented!") y = F.softmax(y, dim=1) # Must convert y to probas to use this ! distrib = torch.distributions.Categorical(y) actionschosen = distrib.sample() logprobs.append(distrib.log_prob(actionschosen)) numactionschosen = actionschosen.data.cpu().numpy() # Turn to scalar if PRINTTRACE: print("Step ", numstep, " Inputs (1st in batch): ", inputs[0,:params['inputsize']], " - Outputs(0): ", y.data.cpu().numpy()[0,:], " - action chosen(0): ", numactionschosen[0], "TrialLen(0):", triallength[0], "trialstep(0):", trialstep[0], "TTHCC(0): ", thistrialhascorrectcue[0], " -Reward (previous step): ", reward[0], ", cues(0):", cues[0], ", cc(0):", correctcue[0]) #print("Step ", numstep, " Inputs: ", inputs[0,:params['inputsize']], " - Outputs: ", y.data.cpu().numpy(), " - action chosen: ", numactionchosen, # " - mean abs pw: ", np.mean(np.abs(pw.data.cpu().numpy())), "TrialLen:", triallength, "trialstep:", trialstep, "TTHCC: ", thistrialhascorrectcue, " -Reward (previous step): ", reward, ", cues:", cues, ", cc:", correctcue) reward = np.zeros(BS, dtype='float32') for nb in range(BS): if numactionschosen[nb] == 1: # Small penalty for any non-rest action taken reward[nb] -= params['wp'] ### DEBUGGING ## Easiest possible episode-dependent response (i.e. the easiest ## possible problem that actually require meta-learning, with ni=2) ## This one works pretty wel... But harder ones don't work well! #if numactionchosen == correctcue : # reward = params['rew'] #else: # reward = -params['rew'] trialstep[nb] += 1 if trialstep[nb] == triallength[nb] - 1: # This was the next-to-last step of the trial (and we showed the response signal, unless it was the first few steps in episode). assert(cues[nb][trialstep[nb] - 1] == params['ni'] or numstep < 2) # We must deliver reward (which will be perceived by the agent at the next step), positive or negative, depending on response if thistrialhascorrectcue[nb] and numactionschosen[nb] == 1: reward[nb] += params['rew'] elif (not thistrialhascorrectcue[nb]) and numactionschosen[nb] == 0: reward[nb] += params['rew'] else: reward[nb] -= params['rew'] if np.random.rand() < params['pf']: reward[nb] = -reward[nb] if trialstep[nb] == triallength[nb]: # This was the last step of the trial (and we showed no input) assert(cues[nb][trialstep[nb] - 1] == -1 or numstep < 2) nbtrials[nb] += 1 totalnbtrials += 1 if thistrialhascorrectcue[nb]: nbtrialswithcc += 1 #nbrewardabletrials += 1 # Trial is dead, long live trial trialstep[nb] = 0 # We initialize the hidden state between trials! #if params['is'] == 1: # hidden = net.initialZeroState() rewards.append(reward) vs.append(v) sumreward += reward #if params['alg'] in ['A3C' , 'REIE' , 'REIT']: loss += (params['bent'] * y.pow(2).sum() / BS ) # We want to penalize concentration, i.e. encourage diversity; our version of PyTorch does not have an entropy() function for Distribution, so we use this instead. ##if PRINTTRACE: ## print("Probabilities:", y.data.cpu().numpy(), "Picked action:", numactionchosen, ", got reward", reward) R = Variable(torch.zeros(BS).cuda(), requires_grad=False) gammaR = params['gr'] for numstepb in reversed(range(params['eplen'])) : R = gammaR * R + Variable(torch.from_numpy(rewards[numstepb]).cuda(), requires_grad=False) ctrR = R - vs[numstepb][0] lossv += ctrR.pow(2).sum() / BS loss -= (logprobs[numstepb] * ctrR.detach()).sum() / BS # Need to check if detach() is OK #pdb.set_trace() # Episode is done, now let's do the actual computations #gammaR = params['gr'] #if params['alg'] == 'A3C': # R = 0 # for numstepb in reversed(range(params['eplen'])) : # R = gammaR * R + rewards[numstepb] # lossv += (vs[numstepb][0] - R).pow(2) # loss -= logprobs[numstepb] * (R - vs[numstepb].data[0][0]) # Not sure if the "data" is needed... put it b/c of worry about weird gradient flows # loss += params['bv'] * lossv #elif params['alg'] in ['REI', 'REIE']: # R = sumreward # baseline = meanreward[correctcue] # for numstepb in reversed(range(params['eplen'])) : # loss -= logprobs[numstepb] * (R - baseline) #elif params['alg'] == 'REIT': # R = 0 # for numstepb in reversed(range(params['eplen'])) : # R = gammaR * R + rewards[numstepb] # loss -= logprobs[numstepb] * (R - meanrewardT[correctcue, numstepb]) #else: # raise ValueError("Must select algo type") #elif params['alg'] == 'REINOB': # R = sumreward # for numstepb in reversed(range(params['eplen'])) : # loss -= logprobs[numstepb] * R #elif params['alg'] == 'REITMP': # R = 0 # for numstepb in reversed(range(params['eplen'])) : # R = gammaR * R + rewards[numstepb] # loss -= logprobs[numstepb] * R #else: # raise ValueError("Which algo?") #meanreward[correctcue] = (1.0 - params['nu']) * meanreward[correctcue] + params['nu'] * sumreward ##meanreward[rposr, rposc] = (1.0 - params['nu']) * meanreward[rposr, rposc] + params['nu'] * sumreward #R = 0 #for numstepb in reversed(range(params['eplen'])) : # R = gammaR * R + rewards[numstepb] # meanrewardT[correctcue, numstepb] = (1.0 - params['nu']) * meanrewardT[correctcue, numstepb] + params['nu'] * R loss += params['blossv'] * lossv loss /= params['eplen'] if PRINTTRACE: #if params['alg'] == 'A3C': print("lossv: ", float(lossv)) #elif params['alg'] in ['REI', 'REIE', 'REIT']: # print("meanreward baselines: ", [meanreward[x] for x in range(params['ni'])]) print ("Total reward for this episode(0):", sumreward[0], "Prop. of trials w/ rewarded cue:", (nbtrialswithcc / totalnbtrials)) print("Nb trials for this episode(0):", nbtrials[0], "[2]:",nbtrials[2]," Total Nb of trials:", totalnbtrials) #if params['squash'] == 1: # if sumreward < 0: # sumreward = -np.sqrt(-sumreward) # else: # sumreward = np.sqrt(sumreward) #elif params['squash'] == 0: # pass #else: # raise ValueError("Incorrect value for squash parameter") #loss *= sumreward #for p in net.parameters(): # p.grad.data.clamp_(-params['clamp'], params['clamp']) loss.backward() all_grad_norms.append(torch.nn.utils.clip_grad_norm(net.parameters(), params['gc'])) if numepisode > 100: # Burn-in period for meanreward optimizer.step() #print(sumreward) lossnum = float(loss) lossbetweensaves += lossnum all_losses_objective.append(lossnum) all_total_rewards.append(sumreward.mean()) #all_total_rewards.append(sumreward[0]) #all_losses_v.append(lossv.data[0]) #total_loss += lossnum if (numepisode+1) % params['pe'] == 0: print(numepisode, "====") print("Mean loss: ", lossbetweensaves / params['pe']) lossbetweensaves = 0 print("Mean reward: ", np.sum(all_total_rewards[-params['pe']:])/ params['pe']) previoustime = nowtime nowtime = time.time() print("Time spent on last", params['pe'], "iters: ", nowtime - previoustime) if params['type'] == 'plastic' or params['type'] == 'lstmplastic': print("ETA: ", float(net.eta), "alpha[0,1]: ", net.alpha.data.cpu().numpy()[0,1], "w[0,1]: ", net.w.data.cpu().numpy()[0,1] ) elif params['type'] == 'modul' or params['type'] == 'modul2': print("ETA: ", net.eta.data.cpu().numpy(), " etaet: ", net.etaet.data.cpu().numpy(), " mean-abs pw: ", np.mean(np.abs(pw.data.cpu().numpy()))) elif params['type'] == 'rnn': print("w[0,1]: ", net.w.data.cpu().numpy()[0,1] ) if (numepisode+1) % params['save_every'] == 0: print("Saving files...") # lossbetweensaves /= params['save_every'] # print("Average loss over the last", params['save_every'], "episodes:", lossbetweensaves) # print("Alternative computation (should be equal):", np.mean(all_losses_objective[-params['save_every']:])) losslast100 = np.mean(all_losses_objective[-100:]) print("Average loss over the last 100 episodes:", losslast100) # # Instability detection; necessary for SELUs, which seem to be divergence-prone # # Note that if we are unlucky enough to have diverged within the last 100 timesteps, this may not save us. # if losslast100 > 2 * lossbetweensavesprev: # print("We have diverged ! Restoring last savepoint!") # net.load_state_dict(torch.load('./torchmodel_'+suffix + '.txt')) # else: print("Saving local files...") #with open('params_'+suffix+'.dat', 'wb') as fo: # #pickle.dump(net.w.data.cpu().numpy(), fo) # #pickle.dump(net.alpha.data.cpu().numpy(), fo) # #pickle.dump(net.eta.data.cpu().numpy(), fo) # #pickle.dump(all_losses, fo) # pickle.dump(params, fo) #with open('loss_'+suffix+'.txt', 'w') as thefile: # for item in all_losses_objective: # thefile.write("%s\n" % item) #with open('lossv_'+suffix+'.txt', 'w') as thefile: # for item in all_losses_v: # thefile.write("%s\n" % item) #with open('grads_'+suffix+'.txt', 'w') as thefile: # for item in all_grad_norms[::10]: # thefile.write("%s\n" % item) with open('loss_'+suffix+'.txt', 'w') as thefile: for item in all_total_rewards[::10]: thefile.write("%s\n" % item) torch.save(net.state_dict(), 'torchmodel_'+suffix+'.dat') with open('params_'+suffix+'.dat', 'wb') as fo: pickle.dump(params, fo) print("Saving HDFS files...") if os.path.isdir('/mnt/share/tmiconi'): print("Transferring to NFS storage...") for fn in ['params_'+suffix+'.dat', 'loss_'+suffix+'.txt', 'torchmodel_'+suffix+'.dat']: result = os.system( 'cp {} {}'.format(fn, '/mnt/share/tmiconi/3level/'+fn)) print("Done!") # lossbetweensavesprev = lossbetweensaves # lossbetweensaves = 0 # sys.stdout.flush() # sys.stderr.flush() if __name__ == "__main__": #defaultParams = { # 'type' : 'lstm', # 'seqlen' : 200, # 'hs': 500, # 'activ': 'tanh', # 'steplr': 10e9, # By default, no change in the learning rate # 'gamma': .5, # The annealing factor of learning rate decay for Adam # 'imagesize': 31, # 'nbiter': 30000, # 'lr': 1e-4, # 'test_every': 10, # 'save_every': 3000, # 'rngseed':0 #} parser = argparse.ArgumentParser() parser.add_argument("--rngseed", type=int, help="random seed", default=0) #parser.add_argument("--clamp", type=float, help="maximum (absolute value) gradient for clamping", default=1000000.0) #parser.add_argument("--wp", type=float, help="wall penalty (reward decrement for hitting a wall)", default=0.1) parser.add_argument("--rew", type=float, help="reward value (reward increment for taking correct action after correct stimulus)", default=1.0) parser.add_argument("--wp", type=float, help="penalty for hitting walls", default=.0) #parser.add_argument("--pen", type=float, help="penalty value (reward decrement for taking any non-rest action)", default=.2) #parser.add_argument("--exprew", type=float, help="reward value (reward increment for hitting reward location)", default=.0) parser.add_argument("--bent", type=float, help="coefficient for the entropy reward (really Simpson index concentration measure)", default=0.03) parser.add_argument("--blossv", type=float, help="coefficient for value prediction loss", default=.1) #parser.add_argument("--probarev", type=float, help="probability of reversal (random change) in desired stimulus-response, per time step", default=0.0) parser.add_argument("--bv", type=float, help="coefficient for value prediction loss", default=.1) #parser.add_argument("--lsize", type=int, help="size of the labyrinth; must be odd", default=7) #parser.add_argument("--randstart", type=int, help="when hitting reward, should we teleport to random location (1) or center (0)?", default=0) #parser.add_argument("--rp", type=int, help="whether the reward should be on the periphery", default=0) #parser.add_argument("--squash", type=int, help="squash reward through signed sqrt (1 or 0)", default=0) #parser.add_argument("--nbarms", type=int, help="number of arms", default=2) #parser.add_argument("--nbseq", type=int, help="number of sequences between reinitializations of hidden/Hebbian state and position", default=3) #parser.add_argument("--activ", help="activ function ('tanh' or 'selu')", default='tanh') parser.add_argument("--alg", help="meta-learning algorithm (A3C or REI or REIE or REIT)", default='REIT') parser.add_argument("--rule", help="learning rule ('hebb' or 'oja')", default='hebb') parser.add_argument("--type", help="network type ('lstm' or 'rnn' or 'plastic')", default='modul') #parser.add_argument("--msize", type=int, help="size of the maze; must be odd", default=9) parser.add_argument("--da", help="transformation function of DA signal (tanh or sig or lin)", default='tanh') parser.add_argument("--gr", type=float, help="gammaR: discounting factor for rewards", default=.9) parser.add_argument("--lr", type=float, help="learning rate (Adam optimizer)", default=1e-4) parser.add_argument("--fm", type=int, help="if using neuromodulation, do we modulate the whole network (1) or just half (0) ?", default=1) #parser.add_argument("--na", type=int, help="number of actions (excluding \"rest\" action)", default=2) parser.add_argument("--ni", type=int, help="number of different inputs", default=2) parser.add_argument("--nu", type=float, help="REINFORCE baseline time constant", default=.1) #parser.add_argument("--samestep", type=int, help="compare stimulus and response in the same step (1) or from successive steps (0) ?", default=0) #parser.add_argument("--nbin", type=int, help="number of possible inputs stimulis", default=4) #parser.add_argument("--modhalf", type=int, help="which half of the recurrent netowkr receives modulation (1 or 2)", default=1) #parser.add_argument("--nbac", type=int, help="number of possible non-rest actions", default=4) #parser.add_argument("--rsp", type=int, help="does the agent start each episode from random position (1) or center (0) ?", default=1) parser.add_argument("--addpw", type=int, help="are plastic weights purely additive (1) or forgetting (0) ?", default=2) parser.add_argument("--clamp", type=int, help="inputs clamped (1), fully clamped (2) or through linear layer (0) ?", default=0) parser.add_argument("--eplen", type=int, help="length of episodes", default=100) #parser.add_argument("--exptime", type=int, help="exploration (no reward) time (must be < eplen)", default=0) parser.add_argument("--hs", type=int, help="size of the recurrent (hidden) layer", default=100) parser.add_argument("--is", type=int, help="do we initialize hidden state after each trial (1) or not (0) ?", default=0) parser.add_argument("--cs", type=int, help="cue size - number of bits for each cue", default=10) parser.add_argument("--pf", type=float, help="probability of flipping the reward (.5 = pure noise)", default=0) parser.add_argument("--l2", type=float, help="coefficient of L2 norm (weight decay)", default=1e-5) parser.add_argument("--bs", type=int, help="batch size", default=1) parser.add_argument("--gc", type=float, help="gradient clipping", default=1000.0) parser.add_argument("--eps", type=float, help="epsilon for Adam optimizer", default=1e-6) #parser.add_argument("--steplr", type=int, help="duration of each step in the learning rate annealing schedule", default=100000000) #parser.add_argument("--gamma", type=float, help="learning rate annealing factor", default=0.3) parser.add_argument("--nbiter", type=int, help="number of learning cycles", default=1000000) parser.add_argument("--save_every", type=int, help="number of cycles between successive save points", default=200) parser.add_argument("--pe", type=int, help="'print every', number of cycles between successive printing of information", default=100) #parser.add_argument("--", type=int, help="", default=1e-4) args = parser.parse_args(); argvars = vars(args); argdict = { k : argvars[k] for k in argvars if argvars[k] != None } #train() train(argdict) ================================================ FILE: sr/srrun.py ================================================ import argparse import pdb import torch import torch.nn as nn from torch.autograd import Variable import numpy as np from numpy import random import torch.nn.functional as F from torch import optim from torch.optim import lr_scheduler import random import sys import pickle import time import os import platform #import makemaze import numpy as np #import matplotlib.pyplot as plt import glob import modul np.set_printoptions(precision=4) ADDINPUT = 4 # 1 inputs for the previous reward, 1 inputs for numstep, 1 unused, 1 "Bias" inputs def train(paramdict): #params = dict(click.get_current_context().params) #params['inputsize'] = RFSIZE * RFSIZE + ADDINPUT + NBNONRESTACTIONS suffix = 'SRB_addpw_2_alg_A3C_bent_0.1_blossv_0.1_bs_30_bv_0.1_clamp_0_cs_20_da_tanh_eplen_120_eps_1e-06_fm_1_gc_2.0_gr_0.9_hs_200_is_0_l2_0.0_lr_0.0001_nbiter_200000_ni_4_nu_0.1_pf_0.0_rew_1.0_rule_hebb_type_modul_wp_0.0_rngseed_11' print("Starting training...") params = {} #params.update(defaultParams) params.update(paramdict) with open('./params_'+suffix+'.dat', 'rb') as fo: params = pickle.load(fo) params['bs'] = 1 print("Used params: ", params) print(platform.uname()) #params['nbsteps'] = params['nbshots'] * ((params['prestime'] + params['interpresdelay']) * params['nbclasses']) + params['prestimetest'] # Total number of steps per episode #NBINPUTBITS = params['ni'] + 1 NBINPUTBITS = params['cs'] + 1 # The additional bit is for the response cue (i.e. the "Go" cue) params['outputsize'] = 2 # "response" and "no response" params['inputsize'] = NBINPUTBITS + params['outputsize'] + ADDINPUT # The total number of input bits is the size of inputs, plus the "response cue" input, plus the number of actions, plus the number of additional inputs # This doesn't work with our version of PyTorch #params['device'] = 'gpu' #device = torch.device("cuda:0" if self.params['device'] == 'gpu' else "cpu") BS = params['bs'] # Initialize random seeds (first two redundant?) print("Setting random seeds") np.random.seed(params['rngseed']); random.seed(params['rngseed']); torch.manual_seed(params['rngseed']) #print(click.get_current_context().params) print("Initializing network") if params['type'] == 'modul': net = modul.RetroModulRNN(params) elif params['type'] == 'modplast': net = modul.SimpleModulRNN(params) elif params['type'] == 'plastic': net = modul.PlasticRNN(params) elif params['type'] == 'rnn': net = modul.NonPlasticRNN(params) else: raise ValueError("Network type unknown or not yet implemented: "+params['type']) print ("Shape of all optimized parameters:", [x.size() for x in net.parameters()]) allsizes = [torch.numel(x.data.cpu()) for x in net.parameters()] print ("Size (numel) of all optimized elements:", allsizes) print ("Total size (numel) of all optimized elements:", sum(allsizes)) #total_loss = 0.0 print("Initializing optimizer") #optimizer = torch.optim.Adam(net.parameters(), lr=1.0*params['lr'], eps=params['eps'], weight_decay=params['l2']) all_losses = [] all_grad_norms = [] all_losses_objective = [] all_total_rewards = [] all_losses_v = [] lossbetweensaves = 0 nowtime = time.time() #meanreward = np.zeros((LABSIZE, LABSIZE)) meanreward = np.zeros(params['ni']) meanrewardT = np.zeros((params['ni'], params['eplen'])) nbtrials = [0]*BS totalnbtrials = 0 nbtrialswithcc = 0 print("Starting episodes!") for numepisode in range(params['nbiter']): PRINTTRACE = 0 #if (numepisode+1) % (1 + params['pe']) == 0: if (numepisode+1) % (params['pe']) == 0: PRINTTRACE = 1 #lab = makemaze.genmaze(size=LABSIZE, nblines=4) #count = np.zeros((LABSIZE, LABSIZE)) # # Select the reward location for this episode - not on a wall! # rposr = 0; rposc = 0 # while lab[rposr, rposc] == 1: # rposr = np.random.randint(1, LABSIZE - 1) # rposc = np.random.randint(1, LABSIZE - 1) # # We always start the episode from the center (when hitting reward, we may teleport either to center or to a random location depending on params['rsp']) # posc = CTR # posr = CTR optimizer.zero_grad() loss = 0 lossv = 0 hidden = net.initialZeroState() if params['type'] != 'rnn': hebb = net.initialZeroHebb() if params['type'] == 'modul': et = net.initialZeroHebb() # Eligibility Trace is identical to Hebbian Trace in shape pw = net.initialZeroPlasticWeights() numactionchosen = 0 # Generate the cues. Make sure they're all different (important when using very small cues for debugging, e.g. cs=2, ni=2) cuedata=[] for nb in range(BS): cuedata.append([]) for ncue in range(params['ni']): assert len(cuedata[nb]) == ncue foundsame = 1 cpt = 0 while foundsame > 0 : cpt += 1 if cpt > 10000: # This should only occur with very weird parameters, e.g. cs=2, ni>4 raise ValueError("Could not generate a full list of different cues") foundsame = 0 candidate = np.random.randint(2, size=params['cs']) * 2 - 1 for backtrace in range(ncue): if np.array_equal(cuedata[nb][backtrace], candidate): foundsame = 1 cuedata[nb].append(candidate) reward = np.zeros(BS) sumreward = np.zeros(BS) rewards = [] vs = [] logprobs = [] cues=[] for nb in range(BS): cues.append([]) dist = 0 numactionschosen = np.zeros(BS, dtype='int32') #reward = 0.0 #rewards = [] #vs = [] #logprobs = [] #sumreward = 0.0 nbtrials = np.zeros(BS) nbrewardabletrials = np.zeros(BS) thistrialhascorrectcue = np.zeros(BS) triallength = np.zeros(BS, dtype='int32') correctcue = np.random.randint(params['ni'], size=BS) trialstep = np.zeros(BS, dtype='int32') #print("EPISODE ", numepisode) for numstep in range(params['eplen']): #if params['clamp'] == 0: inputs = np.zeros((BS, params['inputsize']), dtype='float32') #else: # inputs = np.zeros((1, params['hs']), dtype='float32') for nb in range(BS): if trialstep[nb] == 0: thistrialhascorrectcue[nb] = 0 # Trial length is randomly modulated for each trial; first time step always -1 (i.e. no input cue), last time step always response-cue (i.e. NBINPUTBITS-1). #triallength = params['ni'] // 2 + 3 + np.random.randint(1 + params['ni']) # 3 fixed-cue time steps (1st, last and next-to-last) + some random nb of no-cue time steps triallength[nb] = params['ni'] // 2 + 3 + np.random.randint(params['ni']) # 3 fixed-cue time steps (1st, last and next-to-last) + some random nb of no-cue time steps # In any trial, we only show half the cues (randomly chosen), once each: mycues = [x for x in range(params['ni'])] random.shuffle(mycues); mycues = mycues[:len(mycues) // 2] # The rest is filled with no-input time steps (i.e. cue = -1), but also with the 3 fixed-cue steps (1st, last, next-to-last) for nc in range(triallength[nb] - 3 - len(mycues)): mycues.append(-1) random.shuffle(mycues) mycues.insert(0, -1); mycues.append(params['ni']); mycues.append(-1) # The first and last time step have no input (cue -1), the next-to-last has the response cue. assert(len(mycues) == triallength[nb]) cues[nb] = mycues inputs[nb, :NBINPUTBITS] = 0 if cues[nb][trialstep[nb]] > -1 and cues[nb][trialstep[nb]] < params['ni']: #inputs[0, cues[trialstep]] = 1.0 inputs[nb, :NBINPUTBITS-1] = cuedata[nb][cues[nb][trialstep[nb]]][:] if cues[nb][trialstep[nb]] == correctcue[nb]: thistrialhascorrectcue[nb] = 1 if cues[nb][trialstep[nb]] == params['ni']: inputs[nb, NBINPUTBITS-1] = 1 # "Go" cue inputs[nb, NBINPUTBITS + 0] = 1.0 # Bias neuron, probably not necessary inputs[nb,NBINPUTBITS + 1] = numstep / params['eplen'] inputs[nb, NBINPUTBITS + 2] = 1.0 * reward[nb] # Reward from previous time step if numstep > 0: inputs[nb, NBINPUTBITS + ADDINPUT + numactionschosen[nb]] = 1 # Previously chosen action inputsC = torch.from_numpy(inputs).cuda() # Might be better: #if rposr == posr and rposc = posc: # inputs[0][-4] = 100.0 #else: # inputs[0][-4] = 0 # Running the network ## Running the network if params['type'] == 'modplast': y, v, DAout, hidden, hebb = net(Variable(inputsC, requires_grad=False), hidden, hebb) # y should output raw scores, not probas elif params['type'] == 'modul': y, v, DAout, hidden, hebb, et, pw = net(Variable(inputsC, requires_grad=False), hidden, hebb, et, pw) # y should output raw scores, not probas elif params['type'] == 'plastic': y, v, hidden, hebb = net(Variable(inputsC, requires_grad=False), hidden, hebb) # y should output raw scores, not probas elif params['type'] == 'rnn': y, v, hidden = net(Variable(inputsC, requires_grad=False), hidden) # y should output raw scores, not probas else: raise ValueError("Network type unknown or not yet implemented!") y = F.softmax(y, dim=1) # Must convert y to probas to use this ! distrib = torch.distributions.Categorical(y) actionschosen = distrib.sample() logprobs.append(distrib.log_prob(actionschosen)) numactionschosen = actionschosen.data.cpu().numpy() # Turn to scalar if PRINTTRACE: print("Step ", numstep, " Inputs (1st in batch): ", inputs[0,:params['inputsize']], " - Outputs(0): ", y.data.cpu().numpy()[0,:], " - action chosen(0): ", numactionschosen[0], "TrialLen(0):", triallength[0], "trialstep(0):", trialstep[0], "TTHCC(0): ", thistrialhascorrectcue[0], " -Reward (previous step): ", reward[0], ", cues(0):", cues[0], ", cc(0):", correctcue[0]) #print("Step ", numstep, " Inputs: ", inputs[0,:params['inputsize']], " - Outputs: ", y.data.cpu().numpy(), " - action chosen: ", numactionchosen, # " - mean abs pw: ", np.mean(np.abs(pw.data.cpu().numpy())), "TrialLen:", triallength, "trialstep:", trialstep, "TTHCC: ", thistrialhascorrectcue, " -Reward (previous step): ", reward, ", cues:", cues, ", cc:", correctcue) reward = np.zeros(BS, dtype='float32') for nb in range(BS): if numactionschosen[nb] == 1: # Small penalty for any non-rest action taken reward[nb] -= params['wp'] ### DEBUGGING ## Easiest possible episode-dependent response (i.e. the easiest ## possible problem that actually require meta-learning, with ni=2) ## This one works pretty wel... But harder ones don't work well! #if numactionchosen == correctcue : # reward = params['rew'] #else: # reward = -params['rew'] trialstep[nb] += 1 if trialstep[nb] == triallength[nb] - 1: # This was the next-to-last step of the trial (and we showed the response signal, unless it was the first few steps in episode). assert(cues[nb][trialstep[nb] - 1] == params['ni'] or numstep < 2) # We must deliver reward (which will be perceived by the agent at the next step), positive or negative, depending on response if thistrialhascorrectcue[nb] and numactionschosen[nb] == 1: reward[nb] += params['rew'] elif (not thistrialhascorrectcue[nb]) and numactionschosen[nb] == 0: reward[nb] += params['rew'] else: reward[nb] -= params['rew'] if np.random.rand() < params['pf']: reward[nb] = -reward[nb] if trialstep[nb] == triallength[nb]: # This was the last step of the trial (and we showed no input) assert(cues[nb][trialstep[nb] - 1] == -1 or numstep < 2) nbtrials[nb] += 1 totalnbtrials += 1 if thistrialhascorrectcue[nb]: nbtrialswithcc += 1 #nbrewardabletrials += 1 # Trial is dead, long live trial trialstep[nb] = 0 # We initialize the hidden state between trials! #if params['is'] == 1: # hidden = net.initialZeroState() rewards.append(reward) vs.append(v) sumreward += reward #if params['alg'] in ['A3C' , 'REIE' , 'REIT']: loss += (params['bent'] * y.pow(2).sum() / BS ) # We want to penalize concentration, i.e. encourage diversity; our version of PyTorch does not have an entropy() function for Distribution, so we use this instead. ##if PRINTTRACE: ## print("Probabilities:", y.data.cpu().numpy(), "Picked action:", numactionchosen, ", got reward", reward) R = Variable(torch.zeros(BS).cuda(), requires_grad=False) gammaR = params['gr'] for numstepb in reversed(range(params['eplen'])) : R = gammaR * R + Variable(torch.from_numpy(rewards[numstepb]).cuda(), requires_grad=False) ctrR = R - vs[numstepb][0] lossv += ctrR.pow(2).sum() / BS loss -= (logprobs[numstepb] * ctrR.detach()).sum() / BS # Need to check if detach() is OK #pdb.set_trace() # Episode is done, now let's do the actual computations #gammaR = params['gr'] #if params['alg'] == 'A3C': # R = 0 # for numstepb in reversed(range(params['eplen'])) : # R = gammaR * R + rewards[numstepb] # lossv += (vs[numstepb][0] - R).pow(2) # loss -= logprobs[numstepb] * (R - vs[numstepb].data[0][0]) # Not sure if the "data" is needed... put it b/c of worry about weird gradient flows # loss += params['bv'] * lossv #elif params['alg'] in ['REI', 'REIE']: # R = sumreward # baseline = meanreward[correctcue] # for numstepb in reversed(range(params['eplen'])) : # loss -= logprobs[numstepb] * (R - baseline) #elif params['alg'] == 'REIT': # R = 0 # for numstepb in reversed(range(params['eplen'])) : # R = gammaR * R + rewards[numstepb] # loss -= logprobs[numstepb] * (R - meanrewardT[correctcue, numstepb]) #else: # raise ValueError("Must select algo type") #elif params['alg'] == 'REINOB': # R = sumreward # for numstepb in reversed(range(params['eplen'])) : # loss -= logprobs[numstepb] * R #elif params['alg'] == 'REITMP': # R = 0 # for numstepb in reversed(range(params['eplen'])) : # R = gammaR * R + rewards[numstepb] # loss -= logprobs[numstepb] * R #else: # raise ValueError("Which algo?") #meanreward[correctcue] = (1.0 - params['nu']) * meanreward[correctcue] + params['nu'] * sumreward ##meanreward[rposr, rposc] = (1.0 - params['nu']) * meanreward[rposr, rposc] + params['nu'] * sumreward #R = 0 #for numstepb in reversed(range(params['eplen'])) : # R = gammaR * R + rewards[numstepb] # meanrewardT[correctcue, numstepb] = (1.0 - params['nu']) * meanrewardT[correctcue, numstepb] + params['nu'] * R loss += params['blossv'] * lossv loss /= params['eplen'] if PRINTTRACE: #if params['alg'] == 'A3C': print("lossv: ", float(lossv)) #elif params['alg'] in ['REI', 'REIE', 'REIT']: # print("meanreward baselines: ", [meanreward[x] for x in range(params['ni'])]) print ("Total reward for this episode(0):", sumreward[0], "Prop. of trials w/ rewarded cue:", (nbtrialswithcc / totalnbtrials)) print("Nb trials for this episode(0):", nbtrials[0], "[2]:",nbtrials[2]," Total Nb of trials:", totalnbtrials) #if params['squash'] == 1: # if sumreward < 0: # sumreward = -np.sqrt(-sumreward) # else: # sumreward = np.sqrt(sumreward) #elif params['squash'] == 0: # pass #else: # raise ValueError("Incorrect value for squash parameter") #loss *= sumreward #loss.backward() #all_grad_norms.append(torch.nn.utils.clip_grad_norm(net.parameters(), params['gc'])) #if numepisode > 100: # Burn-in period for meanreward # optimizer.step() #print(sumreward) lossnum = float(loss) lossbetweensaves += lossnum all_losses_objective.append(lossnum) all_total_rewards.append(sumreward.mean()) #all_total_rewards.append(sumreward[0]) #all_losses_v.append(lossv.data[0]) #total_loss += lossnum if (numepisode+1) % params['pe'] == 0: print(numepisode, "====") print("Mean loss: ", lossbetweensaves / params['pe']) lossbetweensaves = 0 print("Mean reward: ", np.sum(all_total_rewards[-params['pe']:])/ params['pe']) previoustime = nowtime nowtime = time.time() print("Time spent on last", params['pe'], "iters: ", nowtime - previoustime) if params['type'] == 'plastic' or params['type'] == 'lstmplastic': print("ETA: ", float(net.eta), "alpha[0,1]: ", net.alpha.data.cpu().numpy()[0,1], "w[0,1]: ", net.w.data.cpu().numpy()[0,1] ) elif params['type'] == 'modul' or params['type'] == 'modul2': print("ETA: ", net.eta.data.cpu().numpy(), " etaet: ", net.etaet.data.cpu().numpy(), " mean-abs pw: ", np.mean(np.abs(pw.data.cpu().numpy()))) elif params['type'] == 'rnn': print("w[0,1]: ", net.w.data.cpu().numpy()[0,1] ) if (numepisode+1) % params['save_every'] == 0: print("Saving files...") # lossbetweensaves /= params['save_every'] # print("Average loss over the last", params['save_every'], "episodes:", lossbetweensaves) # print("Alternative computation (should be equal):", np.mean(all_losses_objective[-params['save_every']:])) losslast100 = np.mean(all_losses_objective[-100:]) print("Average loss over the last 100 episodes:", losslast100) # # Instability detection; necessary for SELUs, which seem to be divergence-prone # # Note that if we are unlucky enough to have diverged within the last 100 timesteps, this may not save us. # if losslast100 > 2 * lossbetweensavesprev: # print("We have diverged ! Restoring last savepoint!") # net.load_state_dict(torch.load('./torchmodel_'+suffix + '.txt')) # else: print("NOT saving files!") # lossbetweensavesprev = lossbetweensaves # lossbetweensaves = 0 # sys.stdout.flush() # sys.stderr.flush() if __name__ == "__main__": #defaultParams = { # 'type' : 'lstm', # 'seqlen' : 200, # 'hs': 500, # 'activ': 'tanh', # 'steplr': 10e9, # By default, no change in the learning rate # 'gamma': .5, # The annealing factor of learning rate decay for Adam # 'imagesize': 31, # 'nbiter': 30000, # 'lr': 1e-4, # 'test_every': 10, # 'save_every': 3000, # 'rngseed':0 #} parser = argparse.ArgumentParser() parser.add_argument("--rngseed", type=int, help="random seed", default=0) #parser.add_argument("--clamp", type=float, help="maximum (absolute value) gradient for clamping", default=1000000.0) #parser.add_argument("--wp", type=float, help="wall penalty (reward decrement for hitting a wall)", default=0.1) parser.add_argument("--rew", type=float, help="reward value (reward increment for taking correct action after correct stimulus)", default=1.0) parser.add_argument("--wp", type=float, help="penalty for hitting walls", default=.0) #parser.add_argument("--pen", type=float, help="penalty value (reward decrement for taking any non-rest action)", default=.2) #parser.add_argument("--exprew", type=float, help="reward value (reward increment for hitting reward location)", default=.0) parser.add_argument("--bent", type=float, help="coefficient for the entropy reward (really Simpson index concentration measure)", default=0.03) parser.add_argument("--blossv", type=float, help="coefficient for value prediction loss", default=.1) #parser.add_argument("--probarev", type=float, help="probability of reversal (random change) in desired stimulus-response, per time step", default=0.0) parser.add_argument("--bv", type=float, help="coefficient for value prediction loss", default=.1) #parser.add_argument("--lsize", type=int, help="size of the labyrinth; must be odd", default=7) #parser.add_argument("--randstart", type=int, help="when hitting reward, should we teleport to random location (1) or center (0)?", default=0) #parser.add_argument("--rp", type=int, help="whether the reward should be on the periphery", default=0) #parser.add_argument("--squash", type=int, help="squash reward through signed sqrt (1 or 0)", default=0) #parser.add_argument("--nbarms", type=int, help="number of arms", default=2) #parser.add_argument("--nbseq", type=int, help="number of sequences between reinitializations of hidden/Hebbian state and position", default=3) #parser.add_argument("--activ", help="activ function ('tanh' or 'selu')", default='tanh') parser.add_argument("--alg", help="meta-learning algorithm (A3C or REI or REIE or REIT)", default='REIT') parser.add_argument("--rule", help="learning rule ('hebb' or 'oja')", default='hebb') parser.add_argument("--type", help="network type ('lstm' or 'rnn' or 'plastic')", default='modul') #parser.add_argument("--msize", type=int, help="size of the maze; must be odd", default=9) parser.add_argument("--da", help="transformation function of DA signal (tanh or sig or lin)", default='tanh') parser.add_argument("--gr", type=float, help="gammaR: discounting factor for rewards", default=.9) parser.add_argument("--lr", type=float, help="learning rate (Adam optimizer)", default=1e-4) parser.add_argument("--fm", type=int, help="if using neuromodulation, do we modulate the whole network (1) or just half (0) ?", default=1) #parser.add_argument("--na", type=int, help="number of actions (excluding \"rest\" action)", default=2) parser.add_argument("--ni", type=int, help="number of different inputs", default=2) parser.add_argument("--nu", type=float, help="REINFORCE baseline time constant", default=.1) #parser.add_argument("--samestep", type=int, help="compare stimulus and response in the same step (1) or from successive steps (0) ?", default=0) #parser.add_argument("--nbin", type=int, help="number of possible inputs stimulis", default=4) #parser.add_argument("--modhalf", type=int, help="which half of the recurrent netowkr receives modulation (1 or 2)", default=1) #parser.add_argument("--nbac", type=int, help="number of possible non-rest actions", default=4) #parser.add_argument("--rsp", type=int, help="does the agent start each episode from random position (1) or center (0) ?", default=1) parser.add_argument("--addpw", type=int, help="are plastic weights purely additive (1) or forgetting (0) ?", default=2) parser.add_argument("--clamp", type=int, help="inputs clamped (1), fully clamped (2) or through linear layer (0) ?", default=0) parser.add_argument("--eplen", type=int, help="length of episodes", default=100) #parser.add_argument("--exptime", type=int, help="exploration (no reward) time (must be < eplen)", default=0) parser.add_argument("--hs", type=int, help="size of the recurrent (hidden) layer", default=100) parser.add_argument("--is", type=int, help="do we initialize hidden state after each trial (1) or not (0) ?", default=0) parser.add_argument("--cs", type=int, help="cue size - number of bits for each cue", default=10) parser.add_argument("--pf", type=float, help="probability of flipping the reward (.5 = pure noise)", default=0) parser.add_argument("--l2", type=float, help="coefficient of L2 norm (weight decay)", default=1e-5) parser.add_argument("--bs", type=int, help="batch size", default=1) parser.add_argument("--gc", type=float, help="gradient clipping", default=1000.0) parser.add_argument("--eps", type=float, help="epsilon for Adam optimizer", default=1e-6) #parser.add_argument("--steplr", type=int, help="duration of each step in the learning rate annealing schedule", default=100000000) #parser.add_argument("--gamma", type=float, help="learning rate annealing factor", default=0.3) parser.add_argument("--nbiter", type=int, help="number of learning cycles", default=1000000) parser.add_argument("--save_every", type=int, help="number of cycles between successive save points", default=200) parser.add_argument("--pe", type=int, help="'print every', number of cycles between successive printing of information", default=100) #parser.add_argument("--", type=int, help="", default=1e-4) args = parser.parse_args(); argvars = vars(args); argdict = { k : argvars[k] for k in argvars if argvars[k] != None } #train() train(argdict) ================================================ FILE: sr/srrun1episode.py ================================================ import argparse import pdb import torch import torch.nn as nn from torch.autograd import Variable import numpy as np from numpy import random import torch.nn.functional as F from torch import optim from torch.optim import lr_scheduler import random import sys import pickle import time import os import platform #import makemaze import numpy as np #import matplotlib.pyplot as plt import glob import modul np.set_printoptions(precision=4) ADDINPUT = 4 # 1 inputs for the previous reward, 1 inputs for numstep, 1 unused, 1 "Bias" inputs def train(paramdict): cuesshownall = []; rewardsprevstepall = []; modulatorall=[] for numrun in range(4): #params = dict(click.get_current_context().params) #params['inputsize'] = RFSIZE * RFSIZE + ADDINPUT + NBNONRESTACTIONS #suffix = 'SRB_addpw_2_alg_A3C_bent_0.1_blossv_0.1_bs_30_bv_0.1_clamp_0_cs_20_da_tanh_eplen_120_eps_1e-06_fm_1_gc_2.0_gr_0.9_hs_200_is_0_l2_0.0_lr_0.0001_nbiter_200000_ni_4_nu_0.1_pf_0.0_rew_1.0_rule_hebb_type_modul_wp_0.0_rngseed_11' suffix = 'SRB_addpw_2_alg_A3C_bent_0.1_blossv_0.1_bs_30_bv_0.1_clamp_0_cs_20_da_tanh_eplen_120_eps_1e-06_fm_1_gc_2.0_gr_0.9_hs_200_is_0_l2_0.0_lr_0.0001_nbiter_200000_ni_4_nu_0.1_pe_500_pf_0.0_rew_1.0_rule_hebb_type_modplast_wp_0.0_rngseed_'+str(numrun) print("Starting training...") params = {} #params.update(defaultParams) params.update(paramdict) with open('./tmp/params_'+suffix+'.dat', 'rb') as fo: params = pickle.load(fo) params['nbiter'] = 1 params['bs'] = 1 print("Used params: ", params) print(platform.uname()) #params['nbsteps'] = params['nbshots'] * ((params['prestime'] + params['interpresdelay']) * params['nbclasses']) + params['prestimetest'] # Total number of steps per episode #NBINPUTBITS = params['ni'] + 1 NBINPUTBITS = params['cs'] + 1 # The additional bit is for the response cue (i.e. the "Go" cue) params['outputsize'] = 2 # "response" and "no response" params['inputsize'] = NBINPUTBITS + params['outputsize'] + ADDINPUT # The total number of input bits is the size of inputs, plus the "response cue" input, plus the number of actions, plus the number of additional inputs # This doesn't work with our version of PyTorch #params['device'] = 'gpu' #device = torch.device("cuda:0" if self.params['device'] == 'gpu' else "cpu") BS = params['bs'] # Initialize random seeds (first two redundant?) print("Setting random seeds") np.random.seed(params['rngseed']); random.seed(params['rngseed']); torch.manual_seed(params['rngseed']) #print(click.get_current_context().params) print("Initializing network") if params['type'] == 'modul': net = modul.RetroModulRNN(params) elif params['type'] == 'modplast': net = modul.SimpleModulRNN(params) elif params['type'] == 'plastic': net = modul.PlasticRNN(params) elif params['type'] == 'rnn': net = modul.NonPlasticRNN(params) else: raise ValueError("Network type unknown or not yet implemented: "+params['type']) net.load_state_dict(torch.load('./tmp/torchmodel_'+suffix+'.dat')) print ("Shape of all optimized parameters:", [x.size() for x in net.parameters()]) allsizes = [torch.numel(x.data.cpu()) for x in net.parameters()] print ("Size (numel) of all optimized elements:", allsizes) print ("Total size (numel) of all optimized elements:", sum(allsizes)) #total_loss = 0.0 print("Initializing optimizer") #optimizer = torch.optim.Adam(net.parameters(), lr=1.0*params['lr'], eps=params['eps'], weight_decay=params['l2']) all_losses = [] all_grad_norms = [] all_losses_objective = [] all_total_rewards = [] all_losses_v = [] lossbetweensaves = 0 nowtime = time.time() #meanreward = np.zeros((LABSIZE, LABSIZE)) meanreward = np.zeros(params['ni']) meanrewardT = np.zeros((params['ni'], params['eplen'])) nbtrials = [0]*BS totalnbtrials = 0 nbtrialswithcc = 0 print("Starting episodes!") for numepisode in range(params['nbiter']): PRINTTRACE = 1 #if (numepisode+1) % (params['pe']) == 0: # PRINTTRACE = 1 #optimizer.zero_grad() loss = 0 lossv = 0 hidden = net.initialZeroState() if params['type'] != 'rnn': hebb = net.initialZeroHebb() if params['type'] == 'modul': et = net.initialZeroHebb() # Eligibility Trace is identical to Hebbian Trace in shape pw = net.initialZeroPlasticWeights() numactionchosen = 0 # Generate the cues. Make sure they're all different (important when using very small cues for debugging, e.g. cs=2, ni=2) cuedata=[] for nb in range(BS): cuedata.append([]) for ncue in range(params['ni']): assert len(cuedata[nb]) == ncue foundsame = 1 cpt = 0 while foundsame > 0 : cpt += 1 if cpt > 10000: # This should only occur with very weird parameters, e.g. cs=2, ni>4 raise ValueError("Could not generate a full list of different cues") foundsame = 0 candidate = np.random.randint(2, size=params['cs']) * 2 - 1 for backtrace in range(ncue): if np.array_equal(cuedata[nb][backtrace], candidate): foundsame = 1 cuedata[nb].append(candidate) reward = np.zeros(BS) sumreward = np.zeros(BS) rewards = [] vs = [] logprobs = [] cues=[] for nb in range(BS): cues.append([]) dist = 0 numactionschosen = np.zeros(BS, dtype='int32') #reward = 0.0 #rewards = [] #vs = [] #logprobs = [] #sumreward = 0.0 nbtrials = np.zeros(BS) nbrewardabletrials = np.zeros(BS) thistrialhascorrectcue = np.zeros(BS) triallength = np.zeros(BS, dtype='int32') correctcue = np.random.randint(params['ni'], size=BS) trialstep = np.zeros(BS, dtype='int32') modulator0 = [] cuesshown0 = [] rewardsprevstep0 = [] #print("EPISODE ", numepisode) for numstep in range(params['eplen']): #if params['clamp'] == 0: inputs = np.zeros((BS, params['inputsize']), dtype='float32') #else: # inputs = np.zeros((1, params['hs']), dtype='float32') for nb in range(BS): if trialstep[nb] == 0: thistrialhascorrectcue[nb] = 0 # Trial length is randomly modulated for each trial; first time step always -1 (i.e. no input cue), last time step always response-cue (i.e. NBINPUTBITS-1). #triallength = params['ni'] // 2 + 3 + np.random.randint(1 + params['ni']) # 3 fixed-cue time steps (1st, last and next-to-last) + some random nb of no-cue time steps triallength[nb] = params['ni'] // 2 + 3 + np.random.randint(params['ni']) # 3 fixed-cue time steps (1st, last and next-to-last) + some random nb of no-cue time steps # In any trial, we only show half the cues (randomly chosen), once each: mycues = [x for x in range(params['ni'])] random.shuffle(mycues); mycues = mycues[:len(mycues) // 2] # The rest is filled with no-input time steps (i.e. cue = -1), but also with the 3 fixed-cue steps (1st, last, next-to-last) for nc in range(triallength[nb] - 3 - len(mycues)): mycues.append(-1) random.shuffle(mycues) mycues.insert(0, -1); mycues.append(params['ni']); mycues.append(-1) # The first and last time step have no input (cue -1), the next-to-last has the response cue. assert(len(mycues) == triallength[nb]) cues[nb] = mycues inputs[nb, :NBINPUTBITS] = 0 if cues[nb][trialstep[nb]] > -1 and cues[nb][trialstep[nb]] < params['ni']: #inputs[0, cues[trialstep]] = 1.0 inputs[nb, :NBINPUTBITS-1] = cuedata[nb][cues[nb][trialstep[nb]]][:] if cues[nb][trialstep[nb]] == correctcue[nb]: thistrialhascorrectcue[nb] = 1 if cues[nb][trialstep[nb]] == params['ni']: inputs[nb, NBINPUTBITS-1] = 1 # "Go" cue inputs[nb, NBINPUTBITS + 0] = 1.0 # Bias neuron, probably not necessary inputs[nb,NBINPUTBITS + 1] = numstep / params['eplen'] inputs[nb, NBINPUTBITS + 2] = 1.0 * reward[nb] # Reward from previous time step if numstep > 0: inputs[nb, NBINPUTBITS + ADDINPUT + numactionschosen[nb]] = 1 # Previously chosen action inputsC = torch.from_numpy(inputs).cuda() # Might be better: #if rposr == posr and rposc = posc: # inputs[0][-4] = 100.0 #else: # inputs[0][-4] = 0 # Running the network ## Running the network if params['type'] == 'modplast': y, v, DAout, hidden, hebb = net(Variable(inputsC, requires_grad=False), hidden, hebb) # y should output raw scores, not probas elif params['type'] == 'modul': y, v, DAout, hidden, hebb, et, pw = net(Variable(inputsC, requires_grad=False), hidden, hebb, et, pw) # y should output raw scores, not probas elif params['type'] == 'plastic': y, v, hidden, hebb = net(Variable(inputsC, requires_grad=False), hidden, hebb) # y should output raw scores, not probas elif params['type'] == 'rnn': y, v, hidden = net(Variable(inputsC, requires_grad=False), hidden) # y should output raw scores, not probas else: raise ValueError("Network type unknown or not yet implemented!") y = F.softmax(y, dim=1) # Must convert y to probas to use this ! distrib = torch.distributions.Categorical(y) actionschosen = distrib.sample() logprobs.append(distrib.log_prob(actionschosen)) numactionschosen = actionschosen.data.cpu().numpy() # Turn to scalar if PRINTTRACE: print("Step ", numstep, " Inputs (1st in batch): ", inputs[0,:params['inputsize']], " - Outputs(0): ", y.data.cpu().numpy()[0,:], " - action chosen(0): ", numactionschosen[0], "TrialLen(0):", triallength[0], "trialstep(0):", trialstep[0], "TTHCC(0): ", thistrialhascorrectcue[0], " -Reward (previous step): ", reward[0], ", cues(0):", cues[0], ", cc(0):", correctcue[0]) #print("Step ", numstep, " Inputs: ", inputs[0,:params['inputsize']], " - Outputs: ", y.data.cpu().numpy(), " - action chosen: ", numactionchosen, # " - mean abs pw: ", np.mean(np.abs(pw.data.cpu().numpy())), "TrialLen:", triallength, "trialstep:", trialstep, "TTHCC: ", thistrialhascorrectcue, " -Reward (previous step): ", reward, ", cues:", cues, ", cc:", correctcue) cuesshown0.append(cues[0][trialstep[0]]) rewardsprevstep0.append(float(reward[0])) modulator0.append(float(DAout[0])) reward = np.zeros(BS, dtype='float32') for nb in range(BS): if numactionschosen[nb] == 1: # Small penalty for any non-rest action taken reward[nb] -= params['wp'] ### DEBUGGING ## Easiest possible episode-dependent response (i.e. the easiest ## possible problem that actually require meta-learning, with ni=2) ## This one works pretty wel... But harder ones don't work well! #if numactionchosen == correctcue : # reward = params['rew'] #else: # reward = -params['rew'] trialstep[nb] += 1 if trialstep[nb] == triallength[nb] - 1: # This was the next-to-last step of the trial (and we showed the response signal, unless it was the first few steps in episode). assert(cues[nb][trialstep[nb] - 1] == params['ni'] or numstep < 2) # We must deliver reward (which will be perceived by the agent at the next step), positive or negative, depending on response if thistrialhascorrectcue[nb] and numactionschosen[nb] == 1: reward[nb] += params['rew'] elif (not thistrialhascorrectcue[nb]) and numactionschosen[nb] == 0: reward[nb] += params['rew'] else: reward[nb] -= params['rew'] if np.random.rand() < params['pf']: reward[nb] = -reward[nb] if trialstep[nb] == triallength[nb]: # This was the last step of the trial (and we showed no input) assert(cues[nb][trialstep[nb] - 1] == -1 or numstep < 2) nbtrials[nb] += 1 totalnbtrials += 1 if thistrialhascorrectcue[nb]: nbtrialswithcc += 1 #nbrewardabletrials += 1 # Trial is dead, long live trial trialstep[nb] = 0 # We initialize the hidden state between trials! #if params['is'] == 1: # hidden = net.initialZeroState() rewards.append(reward) vs.append(v) sumreward += reward #if params['alg'] in ['A3C' , 'REIE' , 'REIT']: loss += (params['bent'] * y.pow(2).sum() / BS ) # We want to penalize concentration, i.e. encourage diversity; our version of PyTorch does not have an entropy() function for Distribution, so we use this instead. ##if PRINTTRACE: ## print("Probabilities:", y.data.cpu().numpy(), "Picked action:", numactionchosen, ", got reward", reward) R = Variable(torch.zeros(BS).cuda(), requires_grad=False) gammaR = params['gr'] for numstepb in reversed(range(params['eplen'])) : R = gammaR * R + Variable(torch.from_numpy(rewards[numstepb]).cuda(), requires_grad=False) ctrR = R - vs[numstepb][0] lossv += ctrR.pow(2).sum() / BS loss -= (logprobs[numstepb] * ctrR.detach()).sum() / BS # Need to check if detach() is OK #pdb.set_trace() # Episode is done, now let's do the actual computations #gammaR = params['gr'] #if params['alg'] == 'A3C': # R = 0 # for numstepb in reversed(range(params['eplen'])) : # R = gammaR * R + rewards[numstepb] # lossv += (vs[numstepb][0] - R).pow(2) # loss -= logprobs[numstepb] * (R - vs[numstepb].data[0][0]) # Not sure if the "data" is needed... put it b/c of worry about weird gradient flows # loss += params['bv'] * lossv #elif params['alg'] in ['REI', 'REIE']: # R = sumreward # baseline = meanreward[correctcue] # for numstepb in reversed(range(params['eplen'])) : # loss -= logprobs[numstepb] * (R - baseline) #elif params['alg'] == 'REIT': # R = 0 # for numstepb in reversed(range(params['eplen'])) : # R = gammaR * R + rewards[numstepb] # loss -= logprobs[numstepb] * (R - meanrewardT[correctcue, numstepb]) #else: # raise ValueError("Must select algo type") #elif params['alg'] == 'REINOB': # R = sumreward # for numstepb in reversed(range(params['eplen'])) : # loss -= logprobs[numstepb] * R #elif params['alg'] == 'REITMP': # R = 0 # for numstepb in reversed(range(params['eplen'])) : # R = gammaR * R + rewards[numstepb] # loss -= logprobs[numstepb] * R #else: # raise ValueError("Which algo?") #meanreward[correctcue] = (1.0 - params['nu']) * meanreward[correctcue] + params['nu'] * sumreward ##meanreward[rposr, rposc] = (1.0 - params['nu']) * meanreward[rposr, rposc] + params['nu'] * sumreward #R = 0 #for numstepb in reversed(range(params['eplen'])) : # R = gammaR * R + rewards[numstepb] # meanrewardT[correctcue, numstepb] = (1.0 - params['nu']) * meanrewardT[correctcue, numstepb] + params['nu'] * R loss += params['blossv'] * lossv loss /= params['eplen'] if PRINTTRACE: #if params['alg'] == 'A3C': print("lossv: ", float(lossv)) #elif params['alg'] in ['REI', 'REIE', 'REIT']: # print("meanreward baselines: ", [meanreward[x] for x in range(params['ni'])]) print ("Total reward for this episode(0):", sumreward[0], "Prop. of trials w/ rewarded cue:", (nbtrialswithcc / totalnbtrials)) #print("Nb trials for this episode(0):", nbtrials[0], "[2]:",nbtrials[2]," Total Nb of trials:", totalnbtrials) #if params['squash'] == 1: # if sumreward < 0: # sumreward = -np.sqrt(-sumreward) # else: # sumreward = np.sqrt(sumreward) #elif params['squash'] == 0: # pass #else: # raise ValueError("Incorrect value for squash parameter") #loss *= sumreward #loss.backward() #all_grad_norms.append(torch.nn.utils.clip_grad_norm(net.parameters(), params['gc'])) #if numepisode > 100: # Burn-in period for meanreward # optimizer.step() #print(sumreward) lossnum = float(loss) lossbetweensaves += lossnum all_losses_objective.append(lossnum) all_total_rewards.append(sumreward.mean()) #all_total_rewards.append(sumreward[0]) #all_losses_v.append(lossv.data[0]) #total_loss += lossnum if (numepisode+1) % params['pe'] == 0: print(numepisode, "====") print("Mean loss: ", lossbetweensaves / params['pe']) lossbetweensaves = 0 print("Mean reward: ", np.sum(all_total_rewards[-params['pe']:])/ params['pe']) previoustime = nowtime nowtime = time.time() print("Time spent on last", params['pe'], "iters: ", nowtime - previoustime) if params['type'] == 'plastic' or params['type'] == 'lstmplastic': print("ETA: ", float(net.eta), "alpha[0,1]: ", net.alpha.data.cpu().numpy()[0,1], "w[0,1]: ", net.w.data.cpu().numpy()[0,1] ) elif params['type'] == 'modul' or params['type'] == 'modul2': print("ETA: ", net.eta.data.cpu().numpy(), " etaet: ", net.etaet.data.cpu().numpy(), " mean-abs pw: ", np.mean(np.abs(pw.data.cpu().numpy()))) elif params['type'] == 'rnn': print("w[0,1]: ", net.w.data.cpu().numpy()[0,1] ) if (numepisode+1) % params['save_every'] == 0: print("Saving files...") # lossbetweensaves /= params['save_every'] # print("Average loss over the last", params['save_every'], "episodes:", lossbetweensaves) # print("Alternative computation (should be equal):", np.mean(all_losses_objective[-params['save_every']:])) losslast100 = np.mean(all_losses_objective[-100:]) print("Average loss over the last 100 episodes:", losslast100) # # Instability detection; necessary for SELUs, which seem to be divergence-prone # # Note that if we are unlucky enough to have diverged within the last 100 timesteps, this may not save us. # if losslast100 > 2 * lossbetweensavesprev: # print("We have diverged ! Restoring last savepoint!") # net.load_state_dict(torch.load('./torchmodel_'+suffix + '.txt')) # else: print("NOT saving files!") # lossbetweensavesprev = lossbetweensaves # lossbetweensaves = 0 # sys.stdout.flush() # sys.stderr.flush() modulatorall.append(modulator0) cuesshownall.append(cuesshown0) rewardsprevstepall.append(rewardsprevstep0) np.save('cueshown0.dat', np.array(cuesshownall)) np.save('modulator0.dat', np.array(modulatorall)) np.save('rewardsprevstep0.dat', np.array(rewardsprevstepall)) if __name__ == "__main__": #defaultParams = { # 'type' : 'lstm', # 'seqlen' : 200, # 'hs': 500, # 'activ': 'tanh', # 'steplr': 10e9, # By default, no change in the learning rate # 'gamma': .5, # The annealing factor of learning rate decay for Adam # 'imagesize': 31, # 'nbiter': 30000, # 'lr': 1e-4, # 'test_every': 10, # 'save_every': 3000, # 'rngseed':0 #} parser = argparse.ArgumentParser() parser.add_argument("--rngseed", type=int, help="random seed", default=0) #parser.add_argument("--clamp", type=float, help="maximum (absolute value) gradient for clamping", default=1000000.0) #parser.add_argument("--wp", type=float, help="wall penalty (reward decrement for hitting a wall)", default=0.1) parser.add_argument("--rew", type=float, help="reward value (reward increment for taking correct action after correct stimulus)", default=1.0) parser.add_argument("--wp", type=float, help="penalty for hitting walls", default=.0) #parser.add_argument("--pen", type=float, help="penalty value (reward decrement for taking any non-rest action)", default=.2) #parser.add_argument("--exprew", type=float, help="reward value (reward increment for hitting reward location)", default=.0) parser.add_argument("--bent", type=float, help="coefficient for the entropy reward (really Simpson index concentration measure)", default=0.03) parser.add_argument("--blossv", type=float, help="coefficient for value prediction loss", default=.1) #parser.add_argument("--probarev", type=float, help="probability of reversal (random change) in desired stimulus-response, per time step", default=0.0) parser.add_argument("--bv", type=float, help="coefficient for value prediction loss", default=.1) #parser.add_argument("--lsize", type=int, help="size of the labyrinth; must be odd", default=7) #parser.add_argument("--randstart", type=int, help="when hitting reward, should we teleport to random location (1) or center (0)?", default=0) #parser.add_argument("--rp", type=int, help="whether the reward should be on the periphery", default=0) #parser.add_argument("--squash", type=int, help="squash reward through signed sqrt (1 or 0)", default=0) #parser.add_argument("--nbarms", type=int, help="number of arms", default=2) #parser.add_argument("--nbseq", type=int, help="number of sequences between reinitializations of hidden/Hebbian state and position", default=3) #parser.add_argument("--activ", help="activ function ('tanh' or 'selu')", default='tanh') parser.add_argument("--alg", help="meta-learning algorithm (A3C or REI or REIE or REIT)", default='REIT') parser.add_argument("--rule", help="learning rule ('hebb' or 'oja')", default='hebb') parser.add_argument("--type", help="network type ('lstm' or 'rnn' or 'plastic')", default='modul') #parser.add_argument("--msize", type=int, help="size of the maze; must be odd", default=9) parser.add_argument("--da", help="transformation function of DA signal (tanh or sig or lin)", default='tanh') parser.add_argument("--gr", type=float, help="gammaR: discounting factor for rewards", default=.9) parser.add_argument("--lr", type=float, help="learning rate (Adam optimizer)", default=1e-4) parser.add_argument("--fm", type=int, help="if using neuromodulation, do we modulate the whole network (1) or just half (0) ?", default=1) #parser.add_argument("--na", type=int, help="number of actions (excluding \"rest\" action)", default=2) parser.add_argument("--ni", type=int, help="number of different inputs", default=2) parser.add_argument("--nu", type=float, help="REINFORCE baseline time constant", default=.1) #parser.add_argument("--samestep", type=int, help="compare stimulus and response in the same step (1) or from successive steps (0) ?", default=0) #parser.add_argument("--nbin", type=int, help="number of possible inputs stimulis", default=4) #parser.add_argument("--modhalf", type=int, help="which half of the recurrent netowkr receives modulation (1 or 2)", default=1) #parser.add_argument("--nbac", type=int, help="number of possible non-rest actions", default=4) #parser.add_argument("--rsp", type=int, help="does the agent start each episode from random position (1) or center (0) ?", default=1) parser.add_argument("--addpw", type=int, help="are plastic weights purely additive (1) or forgetting (0) ?", default=2) parser.add_argument("--clamp", type=int, help="inputs clamped (1), fully clamped (2) or through linear layer (0) ?", default=0) parser.add_argument("--eplen", type=int, help="length of episodes", default=100) #parser.add_argument("--exptime", type=int, help="exploration (no reward) time (must be < eplen)", default=0) parser.add_argument("--hs", type=int, help="size of the recurrent (hidden) layer", default=100) parser.add_argument("--is", type=int, help="do we initialize hidden state after each trial (1) or not (0) ?", default=0) parser.add_argument("--cs", type=int, help="cue size - number of bits for each cue", default=10) parser.add_argument("--pf", type=float, help="probability of flipping the reward (.5 = pure noise)", default=0) parser.add_argument("--l2", type=float, help="coefficient of L2 norm (weight decay)", default=1e-5) parser.add_argument("--bs", type=int, help="batch size", default=1) parser.add_argument("--gc", type=float, help="gradient clipping", default=1000.0) parser.add_argument("--eps", type=float, help="epsilon for Adam optimizer", default=1e-6) #parser.add_argument("--steplr", type=int, help="duration of each step in the learning rate annealing schedule", default=100000000) #parser.add_argument("--gamma", type=float, help="learning rate annealing factor", default=0.3) parser.add_argument("--nbiter", type=int, help="number of learning cycles", default=1000000) parser.add_argument("--save_every", type=int, help="number of cycles between successive save points", default=200) parser.add_argument("--pe", type=int, help="'print every', number of cycles between successive printing of information", default=100) #parser.add_argument("--", type=int, help="", default=1e-4) args = parser.parse_args(); argvars = vars(args); argdict = { k : argvars[k] for k in argvars if argvars[k] != None } #train() train(argdict)