Repository: danijar/layered Branch: master Commit: c1c09d95f900 Files: 52 Total size: 74.2 KB Directory structure: gitextract_govqiphk/ ├── .gitignore ├── .travis.yml ├── LICENSE.md ├── README.md ├── dataset/ │ └── .gitignore ├── doc/ │ ├── conf.py │ ├── index.rst │ ├── layered.activation.rst │ ├── layered.cost.rst │ ├── layered.dataset.rst │ ├── layered.evaluation.rst │ ├── layered.example.rst │ ├── layered.gradient.rst │ ├── layered.network.rst │ ├── layered.optimization.rst │ ├── layered.plot.rst │ ├── layered.problem.rst │ ├── layered.trainer.rst │ └── layered.utility.rst ├── layered/ │ ├── __init__.py │ ├── __main__.py │ ├── activation.py │ ├── cost.py │ ├── dataset.py │ ├── evaluation.py │ ├── example.py │ ├── gradient.py │ ├── network.py │ ├── optimization.py │ ├── plot.py │ ├── problem.py │ ├── trainer.py │ └── utility.py ├── problem/ │ ├── mnist-relu-batch.yaml │ ├── mnist-relu-online.yaml │ ├── modulo.yaml │ ├── sparse-field-batch.yaml │ ├── sparse-field-online.yaml │ ├── sparse-max.yaml │ └── tying.yaml ├── pylintrc ├── setup.py └── test/ ├── __init__.py ├── fixtures.py ├── test_example.py ├── test_gradient.py ├── test_network.py ├── test_optimization.py ├── test_plot.py ├── test_problem.py ├── test_trainer.py └── test_utility.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ # Linux *.swp *.swo *.swn # Virtual environment /[Ii]nclude /[Ll]ib /Scripts /bin /local /man /share # Python __pycache__/ *.py[cod] # Pip pip-selfcheck.json *.whl *.egg *.egg-info # Pytest .cache .coverage .coverage* # Setuptools /build /dist *.eggs # Sphinx doc/_build ================================================ FILE: .travis.yml ================================================ language: python python: - "3.5" - "3.4" - "3.3" install: - pip install coveralls script: - python setup.py test - python setup.py install - python setup.py build_sphinx after_success: - coveralls ================================================ FILE: LICENSE.md ================================================ The MIT License (MIT) Copyright (c) 2015 Danijar Hafner Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ [![Build Status][1]][2] [![Code Climate][3]][4] [![Documentation][5]][6] [1]: https://travis-ci.org/danijar/layered.svg?branch=master [2]: https://travis-ci.org/danijar/layered [3]: https://codeclimate.com/github/danijar/layered/badges/gpa.svg [4]: https://codeclimate.com/github/danijar/layered [5]: https://readthedocs.org/projects/pip/badge/ [6]: https://layered.readthedocs.org/en/latest/ Layered ======= This project aims to be a clean and modular implementation of feed forward neural networks. It's written in Python 3 and published under the MIT license. I started this project in order to understand the concepts of deep learning. You can use this repository as guidance if you want to implement neural networks what I highly recommend if you are interested in understanding them. Instructions ------------ This will train a network with 1.3M weights to classify handwritten digits and visualize the progress. After a couple of minutes, the error should drop below 3%. To install globally, just skip the first command. Solutions to all reported problems can be found in the troubleshooting section. ```bash virtualenv . -p python3 --system-site-packages && source bin/activate pip3 install layered curl -o mnist.yaml -L http://git.io/vr7y1 layered mnist.yaml -v ``` ### Problem Definition Learning problems are defined in YAML files and it's easy to create your own. An overview of available cost and activation functions is available a few sections below. ```yaml dataset: Mnist cost: CrossEntropy layers: - activation: Identity size: 784 - activation: Relu size: 700 - activation: Relu size: 700 - activation: Relu size: 400 - activation: Softmax size: 10 epochs: 5 batch_size: 32 learning_rate: 0.01 momentum: 0.9 weight_scale: 0.01 weight_decay: 0 evaluate_every: 5000 ``` ### Command Line Arguments ``` layered [-h] [-v] [-l weights.npy] [-s weights.npy] problem.yaml ``` | Short | Long | Description | | :---- | :--- | :---------- | | `-h` | `--help` | Print usage instructions | | `-v` | `--visual` | Show a diagram of trainig costs and testing error | | `-l` | `--load` | Path to load learned weights from at startup | | `-s` | `--save` | Path to dump the learned weights at each evaluation | ### Contribution Optionally, create a virtual environment. Then install the dependencies. The last command is to see if everything works. ```bash git clone https://github.com/danijar/layered.git && cd layered virtualenv . -p python3 --system-site-packages && source bin/activate pip3 install -e . python3 -m layered problem/modulo.yaml -v ``` Now you can start playing around with the code. For pull requests, please squash the changes to a single commit and ensure that the linters and tests are passing. ```bash python setup.py test ``` If you have questions, feel free to contact me. Advanced Guide -------------- In this guide you will learn how to create and train models manually rather than using the problem definitions to gain more insight into training neural networks. Let's start! ### Step 1: Network Definition A network is defined by its layers. The parameters for a layer are the amount of neurons and the activation function. The first layer has the identity function since we don't want to already modify the input data before feeding it in. ```python from layered.network import Network from layered.activation import Identity, Relu, Softmax num_inputs = 784 num_outputs = 10 network = Network([ Layer(num_inputs, Identity), Layer(700, Relu), Layer(500, Relu), Layer(300, Relu), Layer(num_outputs, Softmax), ]) ``` ### Step 2: Activation Functions | Function | Description | Definition | __________Graph__________ | | -------- | ----------- | :--------: | ------------------------- | | Identity | Don't transform the incoming data. That's what you would expect at input layers. | x | ![Identity](image/identity.png) | | Relu | Fast non-linear function that has proven to be effective in deep networks. | max(0, x) | ![Relu](image/relu.png) | | Sigmoid | The de facto standard activation before Relu came up. Smoothly maps the incoming activation into a range from zero to one. | 1 / (1 + exp(-x)) | ![Sigmoid](image/sigmoid.png) | | Softmax | Smooth activation function where the outgoing activations sum up to one. It's commonly used for output layers in classification because the outgoing activations can be interpreted as probabilities. | exp(x) / sum(exp(x)) | ![Softmax](image/softmax.png) | ### Step 3: Weight Initialization The weight matrices of the network are handed to algorithms like backpropagation, gradient descent and weight decay. If the initial weights of a neural network would be zero, no activation would be passed to the deeper layers. So we start with random values sampled from a normal distribution. ```python from layered.network import Matrices weights = Matrices(network.shapes) weights.flat = np.random.normal(0, weight_scale, len(weights.flat)) ``` ### Step 4: Optimization Algorithm Now let's learn good weights with standard backpropagation and gradient descent. The classes for this can be imported from the `gradient` and `optimization` modules. We also need a cost function. ```python from layered.cost import SquaredError from layered.gradient import Backprop from layered.optimization import GradientDecent backprop = Backprop(network, cost=SquaredError()) descent = GradientDecent() ``` ### Step 5: Cost Functions | Function | Description | Definition | __________Graph__________ | | -------- | ----------- | :--------: | ------------------------- | | SquaredError | The most common cost function. The difference is squared to always be positive and penalize large errors stronger. | (pred - target) ^ 2 / 2 | ![Squared Error](image/squared-error.png) | | CrossEntropy | Logistic cost function useful for classification tasks. Commonly used in conjunction with Softmax output layers. | -((target * log(pred)) + (1 - target) * log(1 - pred)) | ![Cross Entropy](image/cross-entropy.png) | ### Step 6: Dataset and Training Datasets are automatically downloaded and cached. We just iterate over the training examples and train the weights on them. ```python from layered.dataset import Mnist dataset = Mnist() for example in dataset.training: gradient = backprop(weights, example) weights = descent(weights, gradient, learning_rate=0.1) ``` ### Step 7: Evaluation Finally, we want to see what our network has learned. We do this by letting the network predict classes for the testing examples. The strongest class is the model's best bet, thus the `np.argmax`. ```python import numpy as np error = 0 for example in dataset.testing: prediction = network.feed(weights, example.data) if np.argmax(prediction) != np.argmax(example.target): error += 1 / len(dataset.testing) print('Testing error', round(100 * error, 2), '%') ``` Troubleshooting --------------- ### Failed building wheel You can safely ignore this messages during installation. ### Python is not installed as a framework If you get this error on Mac, don't create a virtualenv and install layered globally with `sudo pip3 install layered`. ### Crash at startup Install or reinstall `python3-matplotlib` or equivalent using your package manager. Check if matplotlib works outside of the virtualenv. ```python import matplotlib.pyplot as plt plt.plt([1, 2, 3, 4]) plt.show() ``` Ensure you create your virtualenv with `--system-site-packages`. ### Did you encounter another problem? Please [open an issue][10]. [10]: https://github.com/danijar/layered/issues ================================================ FILE: dataset/.gitignore ================================================ * !.gitignore ================================================ FILE: doc/conf.py ================================================ #!/usr/bin/env python3 import sys import os from unittest.mock import MagicMock sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.coverage', 'sphinx.ext.viewcode', ] MOCK_MODULES = [ 'yaml', 'numpy', 'matplotlib', 'matplotlib.pyplot', ] for mod_name in MOCK_MODULES: sys.modules[mod_name] = MagicMock() ################################################################ # General ################################################################ project = 'Layered' copyright = '2015, Danijar Hafner' author = 'Danijar Hafner' version = '0.1' release = '0.1.4' source_suffix = '.rst' master_doc = 'index' templates_path = ['_templates'] exclude_patterns = ['_build'] pygments_style = 'sphinx' add_module_names = False todo_include_todos = False language = None htmlhelp_basename = 'Layereddoc' ################################################################ # HTML ################################################################ html_domain_indices = False html_use_index = False html_show_sphinx = False html_show_copyright = False ################################################################ # Autodoc ################################################################ autoclass_content = 'class' autodoc_member_order = 'bysource' autodoc_default_flags = [ 'members', 'undoc-members', 'inherited-members', 'show-inheritance', ] autodoc_mock_imports = MOCK_MODULES def autodoc_skip_member(app, what, name, obj, skip, options): keep = ['call', 'iter', 'getitem', 'setitem'] if name.strip('_') in keep: return False return skip def setup(app): app.connect("autodoc-skip-member", autodoc_skip_member) ================================================ FILE: doc/index.rst ================================================ Layered Documentation ===================== .. toctree:: layered.activation layered.cost layered.dataset layered.evaluation layered.example layered.gradient layered.network layered.optimization layered.plot layered.problem layered.trainer layered.utility ================================================ FILE: doc/layered.activation.rst ================================================ layered.activation module ========================= .. automodule:: layered.activation :members: :undoc-members: :show-inheritance: ================================================ FILE: doc/layered.cost.rst ================================================ layered.cost module =================== .. automodule:: layered.cost :members: :undoc-members: :show-inheritance: ================================================ FILE: doc/layered.dataset.rst ================================================ layered.dataset module ====================== .. automodule:: layered.dataset :members: :undoc-members: :show-inheritance: ================================================ FILE: doc/layered.evaluation.rst ================================================ layered.evaluation module ========================= .. automodule:: layered.evaluation :members: :undoc-members: :show-inheritance: ================================================ FILE: doc/layered.example.rst ================================================ layered.example module ====================== .. automodule:: layered.example :members: :undoc-members: :show-inheritance: ================================================ FILE: doc/layered.gradient.rst ================================================ layered.gradient module ======================= .. automodule:: layered.gradient :members: :undoc-members: :show-inheritance: ================================================ FILE: doc/layered.network.rst ================================================ layered.network module ====================== .. automodule:: layered.network :members: :undoc-members: :show-inheritance: ================================================ FILE: doc/layered.optimization.rst ================================================ layered.optimization module =========================== .. automodule:: layered.optimization :members: :undoc-members: :show-inheritance: ================================================ FILE: doc/layered.plot.rst ================================================ layered.plot module =================== .. automodule:: layered.plot :members: :undoc-members: :show-inheritance: ================================================ FILE: doc/layered.problem.rst ================================================ layered.problem module ====================== .. automodule:: layered.problem :members: :undoc-members: :show-inheritance: ================================================ FILE: doc/layered.trainer.rst ================================================ layered.trainer module ====================== .. automodule:: layered.trainer :members: :undoc-members: :show-inheritance: ================================================ FILE: doc/layered.utility.rst ================================================ layered.utility module ====================== .. automodule:: layered.utility :members: :undoc-members: :show-inheritance: ================================================ FILE: layered/__init__.py ================================================ ================================================ FILE: layered/__main__.py ================================================ import os import argparse from layered.problem import Problem from layered.trainer import Trainer def main(): parser = argparse.ArgumentParser('layered') parser.add_argument( 'problem', help='path to the YAML problem definition') parser.add_argument( '-v', '--visual', action='store_true', help='show a diagram of training costs') parser.add_argument( '-l', '--load', default=None, help='path to load the weights from at startup') parser.add_argument( '-s', '--save', default=None, help='path to dump the learned weights at each evaluation') parser.add_argument( '-c', '--check', action='store_true', help='whether to activate gradient checking') args = parser.parse_args() print('Problem', os.path.split(args.problem)[1]) problem = Problem(args.problem) trainer = Trainer( problem, args.load, args.save, args.visual, args.check) trainer() if __name__ == '__main__': main() ================================================ FILE: layered/activation.py ================================================ import numpy as np class Activation: def __call__(self, incoming): raise NotImplementedError def delta(self, incoming, outgoing, above): """ Compute the derivative of the cost with respect to the input of this activation function. Outgoing is what this function returned in the forward pass and above is the derivative of the cost with respect to the outgoing activation. """ raise NotImplementedError class Identity(Activation): def __call__(self, incoming): return incoming def delta(self, incoming, outgoing, above): delta = np.ones(incoming.shape).astype(float) return delta * above class Sigmoid(Activation): def __call__(self, incoming): return 1 / (1 + np.exp(-incoming)) def delta(self, incoming, outgoing, above): delta = outgoing * (1 - outgoing) return delta * above class Relu(Activation): def __call__(self, incoming): return np.maximum(incoming, 0) def delta(self, incoming, outgoing, above): delta = np.greater(incoming, 0).astype(float) return delta * above class Softmax(Activation): def __call__(self, incoming): # The constant doesn't change the expression but prevents overflows. constant = np.max(incoming) exps = np.exp(incoming - constant) return exps / exps.sum() def delta(self, incoming, outgoing, above): delta = outgoing * above sum_ = delta.sum(axis=delta.ndim - 1, keepdims=True) delta -= outgoing * sum_ return delta class SparseField(Activation): def __init__(self, inhibition=0.05, leaking=0.0): self.inhibition = inhibition self.leaking = leaking def __call__(self, incoming): count = len(incoming) length = int(np.sqrt(count)) assert length ** 2 == count, 'layer size must be a square' field = incoming.copy().reshape((length, length)) radius = int(np.sqrt(self.inhibition * count)) // 2 assert radius, 'no inhibition due to small factor' outgoing = np.zeros(field.shape) while True: x, y = np.unravel_index(field.argmax(), field.shape) if field[x, y] <= 0: break outgoing[x, y] = 1 surrounding = np.s_[ max(x - radius, 0):min(x + radius + 1, length), max(y - radius, 0):min(y + radius + 1, length)] field[surrounding] = 0 assert field[x, y] == 0 outgoing = outgoing.reshape(count) outgoing = np.maximum(outgoing, self.leaking * incoming) return outgoing def delta(self, incoming, outgoing, above): delta = np.greater(outgoing, 0).astype(float) return delta * above class SparseRange(Activation): """ E%-Max Winner-Take-All. Binary activation. First, the activation function is applied. Then all neurons within the specified range below the strongest neuron are set to one. All others are set to zero. The gradient is the one of the activation function for active neurons and zero otherwise. See: A Second Function of Gamma Frequency Oscillations: An E%-Max Winner-Take-All Mechanism Selects Which Cells Fire. (2009) """ def __init__(self, range_=0.3, function=Sigmoid()): assert 0 < range_ < 1 self._range = range_ self._function = function def __call__(self, incoming): incoming = self._function(incoming) threshold = self._threshold(incoming) active = (incoming >= threshold) outgoing = np.zeros(incoming.shape) outgoing[active] = 1 # width = active.sum() * 80 / 1000 # print('|', '#' * width, ' ' * (80 - width), '|') return outgoing def delta(self, incoming, outgoing, above): # return self._function.delta(incoming, outgoing, outgoing * above) return outgoing * self._function.delta(incoming, outgoing, above) def _threshold(self, incoming): min_, max_ = incoming.min(), incoming.max() threshold = min_ + (max_ - min_) * (1 - self._range) return threshold ================================================ FILE: layered/cost.py ================================================ import numpy as np class Cost: def __call__(self, prediction, target): raise NotImplementedError def delta(self, prediction, target): raise NotImplementedError class SquaredError(Cost): """ Fast and simple cost function. """ def __call__(self, prediction, target): return (prediction - target) ** 2 / 2 def delta(self, prediction, target): return prediction - target class CrossEntropy(Cost): """ Logistic cost function used for classification tasks. Learns faster in the beginning than SquaredError because large errors are penalized exponentially. This makes sense in classification since only the best class will be the predicted one. """ def __init__(self, epsilon=1e-11): self.epsilon = epsilon def __call__(self, prediction, target): clipped = np.clip(prediction, self.epsilon, 1 - self.epsilon) cost = target * np.log(clipped) + (1 - target) * np.log(1 - clipped) return -cost def delta(self, prediction, target): denominator = np.maximum(prediction - prediction ** 2, self.epsilon) delta = (prediction - target) / denominator assert delta.shape == target.shape == prediction.shape return delta ================================================ FILE: layered/dataset.py ================================================ import array import os import shutil import struct import gzip from urllib.request import urlopen import numpy as np from layered.example import Example from layered.utility import ensure_folder class Dataset: urls = [] cache = True def __init__(self): cache = type(self).cache if cache and self._is_cached(): print('Load cached dataset') self.load() else: filenames = [self.download(x) for x in type(self).urls] self.training, self.testing = self.parse(*filenames) if cache: self.dump() @classmethod def folder(cls): name = cls.__name__.lower() home = os.path.expanduser('~') folder = os.path.join(home, '.layered/dataset', name) ensure_folder(folder) return folder def parse(self): """ Subclass responsibility. The filenames of downloaded files will be passed as individual parameters to this function. Therefore, it must accept as many parameters as provided class-site urls. Should return a tuple of training examples and testing examples. """ raise NotImplementedError def dump(self): np.save(self._training_path(), self.training) np.save(self._testing_path(), self.testing) def load(self): self.training = np.load(self._training_path()) self.testing = np.load(self._testing_path()) def download(self, url): _, filename = os.path.split(url) filename = os.path.join(self.folder(), filename) print('Download', filename) with urlopen(url) as response, open(filename, 'wb') as file_: shutil.copyfileobj(response, file_) return filename @staticmethod def split(examples, ratio=0.8): """ Utility function that can be used within the parse() implementation of sub classes to split a list of example into two lists for training and testing. """ split = int(ratio * len(examples)) return examples[:split], examples[split:] def _is_cached(self): if not os.path.exists(self._training_path()): return False if not os.path.exists(self._testing_path()): return False return True def _training_path(self): return os.path.join(self.folder(), 'training.npy') def _testing_path(self): return os.path.join(self.folder(), 'testing.npy') class Test(Dataset): cache = False def __init__(self, amount=10): self.amount = amount super().__init__() def parse(self): examples = [Example([1, 2, 3], [1, 2, 3]) for _ in range(self.amount)] return self.split(examples) class Regression(Dataset): """ Synthetically generated dataset for regression. The task is to predict the sum and product of all the input values. All values are normalized between zero and one. """ cache = False def __init__(self, amount=10000, inputs=10): self.amount = amount self.inputs = inputs super().__init__() def parse(self): data = np.random.rand(self.amount, self.inputs) products = np.prod(data, axis=1) products = products / np.max(products) sums = np.sum(data, axis=1) sums = sums / np.max(sums) targets = np.column_stack([sums, products]) examples = [Example(x, y) for x, y in zip(data, targets)] return self.split(examples) class Modulo(Dataset): """ Sythetically generated classification dataset. The task is to predict the modulo classes of random integers encoded as bit arrays of length 32. """ cache = False def __init__(self, amount=60000, inputs=32, classes=7): self.amount = amount self.inputs = inputs self.classes = classes super().__init__() def parse(self): data = np.random.randint(0, self.inputs ** 2 - 1, self.amount) mods = np.mod(data, self.classes) targets = np.zeros((self.amount, self.classes)) for index, mod in enumerate(mods): targets[index][mod] = 1 data = (((data[:, None] & (1 << np.arange(self.inputs)))) > 0) examples = [Example(x, y) for x, y in zip(data, targets)] return self.split(examples) class Mnist(Dataset): """ The MNIST database of handwritten digits, available from this page, has a training set of 60,000 examples, and a test set of 10,000 examples. It is a subset of a larger set available from NIST. The digits have been size-normalized and centered in a fixed-size image. It is a good database for people who want to try learning techniques and pattern recognition methods on real-world data while spending minimal efforts on preprocessing and formatting. (from http://yann.lecun.com/exdb/mnist/) """ urls = [ 'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', 'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', 'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', 'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', ] def parse(self, train_x, train_y, test_x, test_y): # pylint: disable=arguments-differ training = list(self.read(train_x, train_y)) testing = list(self.read(test_x, test_y)) return training, testing @staticmethod def read(data, labels): images = gzip.open(data, 'rb') _, size, rows, cols = struct.unpack('>IIII', images.read(16)) image_bin = array.array('B', images.read()) images.close() labels = gzip.open(labels, 'rb') _, size2 = struct.unpack('>II', labels.read(8)) assert size == size2 label_bin = array.array('B', labels.read()) labels.close() for i in range(size): data = image_bin[i * rows * cols:(i + 1) * rows * cols] data = np.array(data).reshape(rows * cols) / 255 target = np.zeros(10) target[label_bin[i]] = 1 yield Example(data, target) ================================================ FILE: layered/evaluation.py ================================================ import numpy as np def compute_costs(network, weights, cost, examples): prediction = [network.feed(weights, x.data) for x in examples] costs = [cost(x, y.target).mean() for x, y in zip(prediction, examples)] return costs def compute_error(network, weights, examples): prediction = [network.feed(weights, x.data) for x in examples] error = sum(bool(np.argmax(x) != np.argmax(y.target)) for x, y in zip(prediction, examples)) / len(examples) return error ================================================ FILE: layered/example.py ================================================ import numpy as np class Example: """ Immutable class representing one example in a dataset. """ __slots__ = ('_data', '_target') def __init__(self, data, target): self._data = np.array(data, dtype=float) self._target = np.array(target, dtype=float) @property def data(self): return self._data @property def target(self): return self._target def __getstate__(self): return {'data': self.data, 'target': self.target} def __setstate__(self, state): self._data = state['data'] self._target = state['target'] def __repr__(self): data = ' '.join(str(round(x, 2)) for x in self.data) target = ' '.join(str(round(x, 2)) for x in self.target) return '({})->({})'.format(data, target) ================================================ FILE: layered/gradient.py ================================================ import math import functools import multiprocessing import numpy as np from layered.network import Matrices from layered.utility import batched class Gradient: def __init__(self, network, cost): self.network = network self.cost = cost def __call__(self, weights, example): raise NotImplementedError class Backprop(Gradient): """ Use the backpropagation algorithm to efficiently determine the gradient of the cost function with respect to each individual weight. """ def __call__(self, weights, example): prediction = self.network.feed(weights, example.data) delta_output = self._delta_output(prediction, example.target) delta_layers = self._delta_layers(weights, delta_output) delta_weights = self._delta_weights(delta_layers) return delta_weights def _delta_output(self, prediction, target): assert len(target) == self.network.layers[-1].size # The derivative with respect to the output layer is computed as the # product of error derivative and local derivative at the layer. delta_cost = self.cost.delta(prediction, target) delta_output = self.network.layers[-1].delta(delta_cost) assert len(delta_cost) == len(delta_output) == len(target) return delta_output def _delta_layers(self, weights, delta_output): # Propagate backwards through the hidden layers but not the input # layer. The current weight matrix is the one to the right of the # current layer. gradient = [delta_output] hidden = list(zip(weights[1:], self.network.layers[1:-1])) assert all(x.shape[0] - 1 == len(y) for x, y in hidden) for weight, layer in reversed(hidden): delta = self._delta_layer(layer, weight, gradient[-1]) gradient.append(delta) return reversed(gradient) def _delta_layer(self, layer, weight, above): # The gradient at a layer is computed as the derivative of both the # local activation and the weighted sum of the derivatives in the # deeper layer. backward = self.network.backward(weight, above) delta = layer.delta(backward) assert len(layer) == len(backward) == len(delta) return delta def _delta_weights(self, delta_layers): # The gradient with respect to the weights is computed as the gradient # at the target neuron multiplied by the activation of the source # neuron. gradient = Matrices(self.network.shapes) prev_and_delta = zip(self.network.layers[:-1], delta_layers) for index, (previous, delta) in enumerate(prev_and_delta): # We want to tweak the bias weights so we need them in the # gradient. activations = np.insert(previous.outgoing, 0, 1) assert activations[0] == 1 gradient[index] = np.outer(activations, delta) return gradient class NumericalGradient(Gradient): """ Approximate the gradient for each weight individually by sampling the error function slightly above and below the current value of the weight. """ def __init__(self, network, cost, distance=1e-5): super().__init__(network, cost) self.distance = distance def __call__(self, weights, example): """ Modify each weight individually in both directions to calculate a numeric gradient of the weights. """ # We need a copy of the weights that we can modify to evaluate the cost # function on. modified = Matrices(weights.shapes, weights.flat.copy()) gradient = Matrices(weights.shapes) for i, connection in enumerate(weights): for j, original in np.ndenumerate(connection): # Sample above and below and compute costs. modified[i][j] = original + self.distance above = self._evaluate(modified, example) modified[i][j] = original - self.distance below = self._evaluate(modified, example) # Restore the original value so we can reuse the weight matrix # for the next iteration. modified[i][j] = original # Compute the numeric gradient. sample = (above - below) / (2 * self.distance) gradient[i][j] = sample return gradient def _evaluate(self, weights, example): prediction = self.network.feed(weights, example.data) cost = self.cost(prediction, example.target) assert cost.shape == prediction.shape return cost.sum() class CheckedBackprop(Gradient): """ Computes the gradient both analytically trough backpropagation and numerically to validate the backpropagation implementation and derivatives of activation functions and cost functions. This is slow by its nature and it's recommended to validate derivatives on small networks. """ def __init__(self, network, cost, distance=1e-5, tolerance=1e-8): self.tolerance = tolerance super().__init__(network, cost) self.analytic = Backprop(network, cost) self.numeric = NumericalGradient(network, cost, distance) def __call__(self, weights, example): analytic = self.analytic(weights, example) numeric = self.numeric(weights, example) distances = np.absolute(analytic.flat - numeric.flat) worst = distances.max() if worst > self.tolerance: print('Gradient differs by {:.2f}%'.format(100 * worst)) else: print('Gradient looks good') return analytic class BatchBackprop: """ Calculate the average gradient over a batch of examples. """ def __init__(self, network, cost): self.backprop = Backprop(network, cost) def __call__(self, weights, examples): gradient = Matrices(weights.shapes) for example in examples: gradient += self.backprop(weights, example) return gradient / len(examples) class ParallelBackprop: """ Alternative to BatchBackprop that yields the same results but utilizes multiprocessing to make use of more than one processor core. """ def __init__(self, network, cost, workers=4): self.backprop = BatchBackprop(network, cost) self.workers = workers self.pool = multiprocessing.Pool(self.workers) def __call__(self, weights, examples): batch_size = int(math.ceil(len(examples) / self.workers)) batches = list(batched(examples, batch_size)) sizes = [len(x) / batch_size for x in batches] sizes = [x / sum(sizes) for x in sizes] assert len(batches) <= self.workers assert sum(sizes) == 1 compute = functools.partial(self.backprop, weights) gradients = self.pool.map(compute, batches) return sum(x * y for x, y in zip(gradients, sizes)) ================================================ FILE: layered/network.py ================================================ import operator import numpy as np class Layer: def __init__(self, size, activation): assert size and isinstance(size, int) self.size = size self.activation = activation() self.incoming = np.zeros(size) self.outgoing = np.zeros(size) assert len(self.incoming) == len(self.outgoing) == self.size def __len__(self): assert len(self.incoming) == len(self.outgoing) return len(self.incoming) def __repr__(self): return repr(self.outgoing) def __str__(self): table = zip(self.incoming, self.outgoing) rows = [' /'.join('{: >6.3f}'.format(x) for x in row) for row in table] return '\n'.join(rows) def apply(self, incoming): """ Store the incoming activation, apply the activation function and store the result as outgoing activation. """ assert len(incoming) == self.size self.incoming = incoming outgoing = self.activation(self.incoming) assert len(outgoing) == self.size self.outgoing = outgoing def delta(self, above): """ The derivative of the activation function at the current state. """ return self.activation.delta(self.incoming, self.outgoing, above) class Matrices: def __init__(self, shapes, elements=None): self.shapes = shapes length = sum(x * y for x, y in shapes) if elements is not None: assert len(elements) == length elements = elements.copy() else: elements = np.zeros(length) self.flat = elements def __len__(self): return len(self.shapes) def __getitem__(self, index): if hasattr(index, '__len__'): assert isinstance(index[0], int) return self[index[0]][index[1:]] if isinstance(index, slice): return [self[i] for i in self._range_from_slice(index)] slice_ = self._locate(index) data = self.flat[slice_] data = data.reshape(self.shapes[index]) return data def __setitem__(self, index, data): if hasattr(index, '__len__'): assert isinstance(index[0], int) self[index[0]][index[1:]] = data return if isinstance(index, slice): for i in self._range_from_slice(index): self[i] = data return slice_ = self._locate(index) data = data.reshape(slice_.stop - slice_.start) self.flat[slice_] = data def __getattr__(self, name): # Tunnel not found properties to the underlying array. flat = super().__getattribute__('flat') return getattr(flat, name) def __setattr_(self, name, value): # Ensure that the size of the underlying array doesn't change. if name == 'flat': assert value.shape == self.flat.shape super().__setattr__(name, value) def copy(self): return Matrices(self.shapes, self.flat.copy()) def __add__(self, other): return self._operation(other, lambda x, y: x + y) def __sub__(self, other): return self._operation(other, lambda x, y: x - y) def __mul__(self, other): return self._operation(other, lambda x, y: x * y) def __truediv__(self, other): return self._operation(other, lambda x, y: x / y) __rmul__ = __mul__ __radd__ = __add__ def _operation(self, other, operation): try: other = other.flat except AttributeError: pass return Matrices(self.shapes, operation(self.flat, other)) def _locate(self, index): assert isinstance(index, int), ( 'Only single elemente can be indiced in the first dimension.') if index < 0: index = len(self.shapes) + index if not 0 <= index < len(self.shapes): raise IndexError offset = sum(x * y for x, y in self.shapes[:index]) length = operator.mul(*self.shapes[index]) return slice(offset, offset + length) def _range_from_slice(self, slice_): start = slice_.start if slice_.start else 0 stop = slice_.stop if slice_.stop else len(self.shapes) step = slice_.step if slice_.step else 1 return range(start, stop, step) def __str__(self): return str(len(self.flat)) + str(self.flat) class Network: def __init__(self, layers): self.layers = layers self.sizes = tuple(layer.size for layer in self.layers) # Weight matrices have the dimensions of the two layers around them. # Also, there is an additional bias input to each weight matrix. self.shapes = zip(self.sizes[:-1], self.sizes[1:]) self.shapes = [(x + 1, y) for x, y in self.shapes] # Weight matrices are in between the layers. assert len(self.shapes) == len(self.layers) - 1 def feed(self, weights, data): """ Evaluate the network with alternative weights on the input data and return the output activation. """ assert len(data) == self.layers[0].size self.layers[0].apply(data) # Propagate trough the remaining layers. connections = zip(self.layers[:-1], weights, self.layers[1:]) for previous, weight, current in connections: incoming = self.forward(weight, previous.outgoing) current.apply(incoming) # Return the activations of the output layer. return self.layers[-1].outgoing @staticmethod def forward(weight, activations): # Add bias input of one. activations = np.insert(activations, 0, 1) assert activations[0] == 1 right = activations.dot(weight) return right @staticmethod def backward(weight, activations): left = activations.dot(weight.transpose()) # Don't expose the bias input of one. left = left[1:] return left ================================================ FILE: layered/optimization.py ================================================ class GradientDecent: """ Adapt the weights in the opposite direction of the gradient to reduce the error. """ def __call__(self, weights, gradient, learning_rate=0.1): return weights - learning_rate * gradient class Momentum: """ Slow down changes of direction in the gradient by aggregating previous values of the gradient and multiplying them in. """ def __init__(self): self.previous = None def __call__(self, gradient, rate=0.9): gradient = gradient.copy() if self.previous is None: self.previous = gradient.copy() else: assert self.previous.shape == gradient.shape gradient += rate * self.previous self.previous = gradient.copy() return gradient class WeightDecay: """ Slowly moves each weight closer to zero for regularization. This can help the model to find simpler solutions. """ def __call__(self, weights, rate=1e-4): return (1 - rate) * weights class WeightTying: """ Constraint groups of slices of the gradient to have the same value by averaging them. Should be applied to the initial weights and each gradient. """ def __init__(self, *groups): for group in groups: assert group and hasattr(group, '__len__') assert all([isinstance(x[0], int) for x in group]) assert all([isinstance(y, (slice, int)) for x in group for y in x]) self.groups = groups def __call__(self, matrices): matrices = matrices.copy() for group in self.groups: slices = [matrices[slice_] for slice_ in group] assert all([x.shape == slices[0].shape for x in slices]), ( 'All slices within a group must have the same shape. ' 'Shapes are ' + ', '.join(str(x.shape) for x in slices) + '.') average = sum(slices) / len(slices) assert average.shape == slices[0].shape for slice_ in group: matrices[slice_] = average return matrices ================================================ FILE: layered/plot.py ================================================ # pylint: disable=wrong-import-position import collections import time import warnings import inspect import threading import matplotlib # Don't call the code if Sphinx inspects the file mocking external imports. if inspect.ismodule(matplotlib): # noqa # On Mac force backend that works with threading. if matplotlib.get_backend() == 'MacOSX': matplotlib.use('TkAgg') # Hide matplotlib deprecation message. warnings.filterwarnings('ignore', category=matplotlib.cbook.mplDeprecation) # Ensure available interactive backend. if matplotlib.get_backend() not in matplotlib.rcsetup.interactive_bk: print('No visual backend available. Maybe you are inside a virtualenv ' 'that was created without --system-site-packages.') import matplotlib.pyplot as plt class Interface: def __init__(self, title='', xlabel='', ylabel='', style=None): self._style = style or {} self._title = title self._xlabel = xlabel self._ylabel = ylabel self.xdata = [] self.ydata = [] self.width = 0 self.height = 0 @property def style(self): return self._style @property def title(self): return self._title @property def xlabel(self): return self._xlabel @property def ylabel(self): return self._ylabel class State: def __init__(self): self.running = False class Window: def __init__(self, refresh=0.5): self.refresh = refresh self.thread = None self.state = State() self.figure = plt.figure() self.interfaces = [] plt.ion() plt.show() def register(self, position, interface): axis = self.figure.add_subplot( position, title=interface.title, xlabel=interface.xlabel, ylabel=interface.ylabel) axis.get_xaxis().set_ticks([]) line, = axis.plot(interface.xdata, interface.ydata, **interface.style) self.interfaces.append((axis, line, interface)) def start(self, work): """ Hand the main thread to the window and continue work in the provided function. A state is passed as the first argument that contains a `running` flag. The function is expected to exit if the flag becomes false. The flag can also be set to false to stop the window event loop and continue in the main thread after the `start()` call. """ assert threading.current_thread() == threading.main_thread() assert not self.state.running self.state.running = True self.thread = threading.Thread(target=work, args=(self.state,)) self.thread.start() while self.state.running: try: before = time.time() self.update() duration = time.time() - before plt.pause(max(0.001, self.refresh - duration)) except KeyboardInterrupt: self.state.running = False self.thread.join() return def stop(self): """ Close the window and stops the worker thread. The main thread will resume with the next command after the `start()` call. """ assert threading.current_thread() == self.thread assert self.state.running self.state.running = False def update(self): """ Redraw the figure to show changed data. This is automatically called after `start()` was run. """ assert threading.current_thread() == threading.main_thread() for axis, line, interface in self.interfaces: line.set_xdata(interface.xdata) line.set_ydata(interface.ydata) axis.set_xlim(0, interface.width or 1, emit=False) axis.set_ylim(0, interface.height or 1, emit=False) self.figure.canvas.draw() class Plot(Interface): def __init__(self, title, xlabel, ylabel, style=None, fixed=None): # pylint: disable=too-many-arguments, redefined-variable-type super().__init__(title, xlabel, ylabel, style or {}) self.max_ = 0 if not fixed: self.xdata = [] self.ydata = [] else: self.xdata = list(range(fixed)) self.ydata = collections.deque([None] * fixed, maxlen=fixed) self.width = fixed def __call__(self, values): self.ydata += values self.max_ = max(self.max_, *values) self.height = 1.05 * self.max_ while len(self.xdata) < len(self.ydata): self.xdata.append(len(self.xdata)) self.width = len(self.xdata) - 1 assert len(self.xdata) == len(self.ydata) ================================================ FILE: layered/problem.py ================================================ import os import yaml import layered.cost import layered.dataset import layered.activation from layered.network import Layer class Problem: def __init__(self, content=None): """ Construct a problem. If content is specified, try to load it as a YAML path and otherwise treat it as an inline YAML string. """ if content and os.path.isfile(content): with open(content) as file_: self.parse(file_) elif content: self.parse(content) self._validate() def __str__(self): keys = self.__dict__.keys() & self._defaults().keys() return str({x: getattr(self, x) for x in keys}) def parse(self, definition): definition = yaml.load(definition) self._load_definition(definition) self._load_symbols() self._load_layers() self._load_weight_tying() assert not definition, ( 'unknown properties {} in problem definition' .format(', '.join(definition.keys()))) def _load_definition(self, definition): # The empty dictionary causes defaults to be loaded even if the # definition is None. if not definition: definition = {} for name, default in self._defaults().items(): type_ = type(default) self.__dict__[name] = type_(definition.pop(name, default)) def _load_symbols(self): # pylint: disable=attribute-defined-outside-init self.cost = self._find_symbol(layered.cost, self.cost)() self.dataset = self._find_symbol(layered.dataset, self.dataset)() def _load_layers(self): for index, layer in enumerate(self.layers): size, activation = layer.pop('size'), layer.pop('activation') activation = self._find_symbol(layered.activation, activation) self.layers[index] = Layer(size, activation) def _load_weight_tying(self): # pylint: disable=attribute-defined-outside-init self.weight_tying = [[y.split(',') for y in x] for x in self.weight_tying] for i, group in enumerate(self.weight_tying): for j, slices in enumerate(group): for k, slice_ in enumerate(slices): slice_ = [int(s) if s else None for s in slice_.split(':')] self.weight_tying[i][j][k] = slice(*slice_) for i, group in enumerate(self.weight_tying): for j, slices in enumerate(group): assert not slices[0].start and not slices[0].step, ( 'Ranges are not allowed in the first dimension.') self.weight_tying[i][j][0] = slices[0].stop def _find_symbol(self, module, name, fallback=None): """ Find the symbol of the specified name inside the module or raise an exception. """ if not hasattr(module, name) and fallback: return self._find_symbol(module, fallback, None) return getattr(module, name) def _validate(self): num_input = len(self.dataset.training[0].data) num_output = len(self.dataset.training[0].target) if self.layers: assert self.layers[0].size == num_input, ( 'the size of the input layer must match the training data') assert self.layers[-1].size == num_output, ( 'the size of the output layer must match the training labels') @staticmethod def _defaults(): return { 'cost': 'SquaredError', 'dataset': 'Modulo', 'layers': [], 'epochs': 1, 'batch_size': 1, 'learning_rate': 0.1, 'momentum': 0.0, 'weight_scale': 0.1, 'weight_mean': 0.0, 'weight_decay': 0.0, 'weight_tying': [], 'evaluate_every': 1000, } ================================================ FILE: layered/trainer.py ================================================ import functools import numpy as np from layered.gradient import BatchBackprop, CheckedBackprop from layered.network import Network, Matrices from layered.optimization import ( GradientDecent, Momentum, WeightDecay, WeightTying) from layered.utility import repeated, batched from layered.evaluation import compute_costs, compute_error class Trainer: # pylint: disable=attribute-defined-outside-init, too-many-arguments def __init__(self, problem, load=None, save=None, visual=False, check=False): self.problem = problem self.load = load self.save = save self.visual = visual self.check = check self._init_network() self._init_training() self._init_visualize() def _init_network(self): """Define model and initialize weights.""" self.network = Network(self.problem.layers) self.weights = Matrices(self.network.shapes) if self.load: loaded = np.load(self.load) assert loaded.shape == self.weights.shape, ( 'weights to load must match problem definition') self.weights.flat = loaded else: self.weights.flat = np.random.normal( self.problem.weight_mean, self.problem.weight_scale, len(self.weights.flat)) def _init_training(self): # pylint: disable=redefined-variable-type """Classes needed during training.""" if self.check: self.backprop = CheckedBackprop(self.network, self.problem.cost) else: self.backprop = BatchBackprop(self.network, self.problem.cost) self.momentum = Momentum() self.decent = GradientDecent() self.decay = WeightDecay() self.tying = WeightTying(*self.problem.weight_tying) self.weights = self.tying(self.weights) def _init_visualize(self): if not self.visual: return from layered.plot import Window, Plot self.plot_training = Plot( 'Training', 'Examples', 'Cost', fixed=1000, style={'linestyle': '', 'marker': '.'}) self.plot_testing = Plot('Testing', 'Time', 'Error') self.window = Window() self.window.register(211, self.plot_training) self.window.register(212, self.plot_testing) def __call__(self): """Train the model and visualize progress.""" print('Start training') repeats = repeated(self.problem.dataset.training, self.problem.epochs) batches = batched(repeats, self.problem.batch_size) if self.visual: self.window.start(functools.partial(self._train_visual, batches)) else: self._train(batches) def _train(self, batches): for index, batch in enumerate(batches): try: self._batch(index, batch) except KeyboardInterrupt: print('\nAborted') return print('Done') def _train_visual(self, batches, state): for index, batch in enumerate(batches): if not state.running: print('\nAborted') return self._batch(index, batch) print('Done') input('Press any key to close window') state.running = False def _batch(self, index, batch): if self.check: assert len(batch) == 1 gradient = self.backprop(self.weights, batch[0]) else: gradient = self.backprop(self.weights, batch) gradient = self.momentum(gradient, self.problem.momentum) gradient = self.tying(gradient) self.weights = self.decent( self.weights, gradient, self.problem.learning_rate) self.weights = self.decay(self.weights, self.problem.weight_decay) self._visualize(batch) self._evaluate(index) def _visualize(self, batch): if not self.visual: return costs = compute_costs( self.network, self.weights, self.problem.cost, batch) self.plot_training(costs) def _evaluate(self, index): if not self._every(self.problem.evaluate_every, self.problem.batch_size, index): return if self.save: np.save(self.save, self.weights) error = compute_error( self.network, self.weights, self.problem.dataset.testing) print('Batch {} test error {:.2f}%'.format(index, 100 * error)) if self.visual: self.plot_testing([error]) @staticmethod def _every(times, step_size, index): """ Given a loop over batches of an iterable and an operation that should be performed every few elements. Determine whether the operation should be called for the current index. """ current = index * step_size step = current // times * times reached = current >= step overshot = current >= step + step_size return current and reached and not overshot ================================================ FILE: layered/utility.py ================================================ import os import errno import functools import itertools def repeated(iterable, times): for _ in range(times): yield from iterable def batched(iterable, size): batch = [] for element in iterable: batch.append(element) if len(batch) == size: yield batch batch = [] if batch: yield batch def averaged(callable_, batch): overall = None for element in batch: current = callable_(element) overall = overall + current if overall else current return overall / len(batch) def listify(fn=None, wrapper=list): """ From http://stackoverflow.com/a/12377059/1079110 """ def listify_return(fn): @functools.wraps(fn) def listify_helper(*args, **kw): return wrapper(fn(*args, **kw)) return listify_helper if fn is None: return listify_return return listify_return(fn) def ensure_folder(path): try: os.makedirs(path) except OSError as e: if e.errno == errno.EEXIST: return raise def hstack_lines(blocks, sep=' '): blocks = [x.split('\n') for x in blocks] height = max(len(block) for block in blocks) widths = [max(len(line) for line in block) for block in blocks] output = '' for y in range(height): for x, w in enumerate(widths): cell = blocks[x][y] if y < len(blocks[x]) else '' output += cell.rjust(w, ' ') + sep output += '\n' return output def pairwise(iterable): a, b = itertools.tee(iterable) next(b, None) return zip(a, b) ================================================ FILE: problem/mnist-relu-batch.yaml ================================================ # 2.12% dataset: Mnist cost: CrossEntropy layers: - activation: Identity size: 784 - activation: Relu size: 700 - activation: Relu size: 700 - activation: Relu size: 400 - activation: Softmax size: 10 epochs: 5 batch_size: 32 learning_rate: 0.01 momentum: 0.9 weight_scale: 0.01 weight_decay: 0 evaluate_every: 5000 ================================================ FILE: problem/mnist-relu-online.yaml ================================================ # 2.59% dataset: Mnist cost: CrossEntropy layers: - activation: Identity size: 784 - activation: Relu size: 700 - activation: Relu size: 400 - activation: Softmax size: 10 epochs: 2 learning_rate: 0.001 momentum: 0 weight_scale: 0.01 weight_decay: 0 evaluate_every: 5000 ================================================ FILE: problem/modulo.yaml ================================================ dataset: Modulo cost: CrossEntropy layers: - activation: Identity size: 32 - activation: Max size: 64 - activation: Max size: 64 - activation: Softmax size: 7 epochs: 5 learning_rate: 0.01 weight_scale: 0.1 evaluate_every: 5000 ================================================ FILE: problem/sparse-field-batch.yaml ================================================ # 8.57% dataset: Mnist cost: CrossEntropy layers: - activation: Identity size: 784 - activation: SparseField size: 625 - activation: SparseField size: 625 - activation: Softmax size: 10 epochs: 5 learning_rate: 0.1 momentum: 0.9 batch_size: 100 weight_scale: 0.001 weight_mean: 0.001 weight_decay: 0 evaluate_every: 5000 ================================================ FILE: problem/sparse-field-online.yaml ================================================ # 6.42% dataset: Mnist cost: CrossEntropy layers: - activation: Identity size: 784 - activation: SparseField size: 625 - activation: SparseField size: 625 - activation: Softmax size: 10 epochs: 5 learning_rate: 0.01 momentum: 0.0 batch_size: 1 weight_scale: 0.001 weight_mean: 0.002 weight_decay: 0 evaluate_every: 5000 ================================================ FILE: problem/sparse-max.yaml ================================================ # 15.83% dataset: Mnist cost: CrossEntropy layers: - activation: Identity size: 784 - activation: SparseRange size: 1000 - activation: SparseRange size: 1000 - activation: Softmax size: 10 epochs: 10 learning_rate: 0.01 momentum: 0 batch_size: 1 weight_scale: 0.001 weight_mean: 0 weight_decay: 0 evaluate_every: 5000 ================================================ FILE: problem/tying.yaml ================================================ dataset: Modulo cost: CrossEntropy layers: - activation: Identity size: 32 - activation: Relu size: 64 - activation: Relu size: 64 - activation: Relu size: 64 - activation: Softmax size: 7 epochs: 1 learning_rate: 0.010 weight_scale: 0.1 # Tie together the two weight matrices # between the three Relu layers. weight_tying: - ['1,:,:', '2,:,:'] evaluate_every: 5000 ================================================ FILE: pylintrc ================================================ [MESSAGES CONTROL] disable= locally-disabled, too-many-instance-attributes, missing-docstring, fixme, too-few-public-methods, invalid-name, no-member, redefined-outer-name [REPORTS] reports=no [BASIC] docstring-min-length=2 ================================================ FILE: setup.py ================================================ import os import sys import subprocess import setuptools from setuptools.command.build_ext import build_ext from setuptools.command.test import test class TestCommand(test): description = 'run tests, linters and create a coverage report' user_options = [] def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.returncode = 0 def finalize_options(self): super().finalize_options() # New setuptools don't need this anymore, thus the try block. try: # pylint: disable=attribute-defined-outside-init self.test_args = [] self.test_suite = 'True' except AttributeError: pass def run_tests(self): self._call('python -m pytest --cov=layered test') self._call('python -m pylint layered') self._call('python -m pylint test') self._call('python -m pylint setup.py') self._check() def _call(self, command): env = os.environ.copy() env['PYTHONPATH'] = ''.join(':' + x for x in sys.path) print('Run command', command) try: subprocess.check_call(command.split(), env=env) except subprocess.CalledProcessError as error: print('Command failed with exit code', error.returncode) self.returncode = 1 def _check(self): if self.returncode: sys.exit(self.returncode) class BuildExtCommand(build_ext): """ Fix Numpy build error when bundled as a dependency. From http://stackoverflow.com/a/21621689/1079110 """ def finalize_options(self): super().finalize_options() __builtins__.__NUMPY_SETUP__ = False import numpy self.include_dirs.append(numpy.get_include()) DESCRIPTION = 'Clean reference implementation of feed forward neural networks' SETUP_REQUIRES = [ 'numpy', 'sphinx', ] INSTALL_REQUIRES = [ 'PyYAML', 'numpy', 'matplotlib', ] TESTS_REQUIRE = [ 'pytest', 'pytest-cov', 'pylint', ] if __name__ == '__main__': setuptools.setup( name='layered', version='0.1.8', description=DESCRIPTION, url='http://github.com/danijar/layered', author='Danijar Hafner', author_email='mail@danijar.com', license='MIT', packages=['layered'], setup_requires=SETUP_REQUIRES, install_requires=INSTALL_REQUIRES, tests_require=TESTS_REQUIRE, cmdclass={'test': TestCommand, 'build_ext': BuildExtCommand}, entry_points={'console_scripts': ['layered=layered.__main__:main']}, ) ================================================ FILE: test/__init__.py ================================================ ================================================ FILE: test/fixtures.py ================================================ import numpy as np import pytest from layered.activation import Identity, Relu, Sigmoid, Softmax from layered.cost import SquaredError, CrossEntropy from layered.network import Matrices, Layer, Network from layered.utility import pairwise from layered.example import Example def random_matrices(shapes): np.random.seed(0) matrix = Matrices(shapes) matrix.flat = np.random.normal(0, 0.1, len(matrix.flat)) return matrix @pytest.fixture(params=[(5, 5, 6, 3)]) def weights(request): shapes = list(pairwise(request.param)) weights = random_matrices(shapes) return weights @pytest.fixture(params=[(5, 5, 6, 3)]) def weights_and_gradient(request): shapes = list(pairwise(request.param)) weights = random_matrices(shapes) gradient = random_matrices(shapes) return weights, gradient @pytest.fixture(params=[Identity, Relu, Sigmoid, Softmax]) def network_and_weights(request): np.random.seed(0) layers = [Layer(5, Identity)] + [Layer(5, request.param) for _ in range(3)] network = Network(layers) weights = Matrices(network.shapes) weights.flat = np.random.normal(0, 0.01, len(weights.flat)) return network, weights @pytest.fixture def example(): data = np.array(range(5)) label = np.array(range(5)) return Example(data, label) @pytest.fixture def examples(): examples = [] for i in range(7): data = np.array(range(5)) + i label = np.array(range(5)) + i examples.append(Example(data, label)) return examples @pytest.fixture(params=[SquaredError, CrossEntropy]) def cost(request): return request.param() ================================================ FILE: test/test_example.py ================================================ # pylint: disable=no-self-use import numpy as np from layered.example import Example class TestExample: def test_representation(self): data = np.array([1, 2, 3]) target = np.array([1, 2, 3]) example = Example(data, target) repr(example) ================================================ FILE: test/test_gradient.py ================================================ # pylint: disable=no-self-use, wildcard-import, unused-wildcard-import from layered.activation import Identity, Relu from layered.cost import CrossEntropy from layered.gradient import ( NumericalGradient, Backprop, BatchBackprop, ParallelBackprop) from test.fixtures import * class TestBackprop: def test_against_numerical(self, network_and_weights, cost, example): network, weights = network_and_weights if isinstance(cost, CrossEntropy) and isinstance( network.layers[1].activation, (Identity, Relu)): pytest.xfail( 'Cross entropy doesn\'t work with linear activations for some ' 'reason.') backprop = Backprop(network, cost) numerical = NumericalGradient(network, cost) gradient = backprop(weights, example) reference = numerical(weights, example) assert np.allclose(gradient, reference) class TestBatchBackprop: def test_calculation(self, network_and_weights, cost, examples): network, weights = network_and_weights batched = BatchBackprop(network, cost) backprop = Backprop(network, cost) gradient = batched(weights, examples) reference = sum(backprop(weights, x) for x in examples) / len(examples) assert np.allclose(gradient, reference) class TestParallelBachprop: def test_against_batch_backprop(self, network_and_weights, cost, examples): network, weights = network_and_weights parallel = ParallelBackprop(network, cost) batched = BatchBackprop(network, cost) gradient = parallel(weights, examples) reference = batched(weights, examples) assert np.allclose(gradient, reference) ================================================ FILE: test/test_network.py ================================================ # pylint: disable=no-self-use import numpy as np import pytest from layered.network import Matrices @pytest.fixture def matrices(): return Matrices([(5, 8), (4, 2)]) class TestMatrices: def test_initialization(self, matrices): assert np.array_equal(matrices[0], np.zeros((5, 8))) assert np.array_equal(matrices[1], np.zeros((4, 2))) def test_indexing(self, matrices): for index, matrix in enumerate(matrices): for (x, y), _ in np.ndenumerate(matrix): assert matrices[index][x, y] == matrices[index, x, y] def test_slicing(self, matrices): for index, matrix in enumerate(matrices): assert (matrices[index][:, :] == matrices[index, :, :]).all() assert (matrices[index][:, :] == matrix[:, :]).all() def test_negative_indices(self, matrices): for i in range(len(matrices)): positive = matrices[len(matrices) - i - 1] negative = matrices[i - 1] assert negative.shape == positive.shape assert (negative == positive).all() def test_assignment(self, matrices): matrices[0, 4, 5] = 42 assert matrices[0, 4, 5] == 42 def test_matrix_assignment(self, matrices): np.random.seed(0) matrix = np.random.rand(*matrices.shapes[0]) matrices[0] = matrix assert (matrices[0] == matrix).all() def test_sliced_matrix_assignment(self, matrices): np.random.seed(0) matrix = np.random.rand(*matrices.shapes[0]) matrices[0][:, :] = matrix assert (matrices[0] == matrix).all() matrices[0, :, :] = matrix assert (matrices[0] == matrix).all() def test_invalid_matrix_assignment(self, matrices): np.random.seed(0) shape = matrices.shapes[0] matrix = np.random.rand(shape[0] + 1, shape[1]) with pytest.raises(ValueError): matrices[0] = matrix ================================================ FILE: test/test_optimization.py ================================================ # pylint: disable=wildcard-import, unused-wildcard-import, no-self-use import numpy as np import pytest from layered import optimization from test.fixtures import * @pytest.fixture(params=[(1, 1), (1, 2), (2, 1), (2, 2), (4, 5)]) def weights_and_gradient_and_groups(request): size, layers = request.param shapes = [(size, size)] * layers weights = random_matrices(shapes) gradient = random_matrices(shapes) slices = [np.s_[i, :, :] for i, x in enumerate(weights)] groups = (slices,) return weights, gradient, groups class TestGradientDecent: def test_calculation(self, weights_and_gradient): weights, gradient = weights_and_gradient decent = optimization.GradientDecent() updated = decent(weights, gradient, 0.1) reference = weights - 0.1 * gradient assert np.allclose(updated, reference) def test_shapes_match(self, weights_and_gradient): weights, gradient = weights_and_gradient decent = optimization.GradientDecent() updated = decent(weights, gradient, 0.1) assert weights.shapes == updated.shapes def test_copy_data(self, weights_and_gradient): weights, gradient = weights_and_gradient decent = optimization.GradientDecent() before = weights.copy() updated = decent(weights, gradient, 0.1) assert (before.flat == weights.flat).all() assert updated.flat[0] != 42 weights.flat[0] = 42 assert updated.flat[0] != 42 class TestMomentum: def test_zero_rate(self, weights_and_gradient): _, gradient = weights_and_gradient original = gradient momentum = optimization.Momentum() for _ in range(5): gradient = momentum(gradient, rate=0) assert np.allclose(gradient, original) def test_shapes_match(self, weights): momentum = optimization.Momentum() updated = momentum(weights, 0.9) assert weights.shapes == updated.shapes def test_copy_data(self, weights): momentum = optimization.Momentum() before = weights.copy() updated = momentum(weights, 0.1) assert (before.flat == weights.flat).all() assert updated.flat[0] != 42 weights.flat[0] = 42 assert updated.flat[0] != 42 class TestWeightDecay: def test_calculation(self, weights): decay = optimization.WeightDecay() updated = decay(weights, 0.1) reference = 0.9 * weights assert np.allclose(updated, reference) def test_shapes_match(self, weights): decay = optimization.WeightDecay() updated = decay(weights, 0.1) assert weights.shapes == updated.shapes def test_copy_data(self, weights): decay = optimization.WeightDecay() before = weights.copy() updated = decay(weights, 0.1) assert (before.flat == weights.flat).all() assert updated.flat[0] != 42 weights.flat[0] = 42 assert updated.flat[0] != 42 class TestWeightTying: def test_calculation(self, weights_and_gradient_and_groups): weights, _, groups = weights_and_gradient_and_groups tying = optimization.WeightTying(*groups) updated = tying(weights) self._is_tied(updated, groups) def test_shapes_match(self, weights_and_gradient_and_groups): weights, _, groups = weights_and_gradient_and_groups tying = optimization.WeightTying(*groups) updated = tying(weights) assert weights.shapes == updated.shapes def test_dont_affect_others(self, weights_and_gradient_and_groups): weights, _, _ = weights_and_gradient_and_groups if len(weights.shapes) < 2: pytest.skip() group = (np.s_[0, :, :], np.s_[1, :, :]) tying = optimization.WeightTying(group) updated = tying(weights) assert (updated[0] == updated[1]).all() for before, after in zip(weights[2:], updated[2:]): assert (before == after).all() def test_weights_stay_tied(self, weights_and_gradient_and_groups): weights, gradient, groups = weights_and_gradient_and_groups tying = optimization.WeightTying(*groups) decent = optimization.GradientDecent() weights = tying(weights) weights = decent(weights, gradient, 0.1) self._is_tied(weights, groups) def test_copy_data(self, weights_and_gradient_and_groups): weights, _, groups = weights_and_gradient_and_groups tying = optimization.WeightTying(*groups) before = weights.copy() updated = tying(weights) assert (before.flat == weights.flat).all() assert updated.flat[0] != 42 weights.flat[0] = 42 assert updated.flat[0] != 42 def _is_tied(self, matrices, groups): for group in groups: slices = [matrices[x] for x in group] assert [np.allclose(x, slices[0]) for x in slices] ================================================ FILE: test/test_plot.py ================================================ # pylint: disable=no-self-use class TestPlot: def test_interactive_backend(self): import matplotlib matplotlib.use('TkAgg') ================================================ FILE: test/test_problem.py ================================================ # pylint: disable=no-self-use import pytest from layered.problem import Problem class TestProblem: def test_unknown_property(self): with pytest.raises(Exception): Problem('foo: 42') def test_incompatible_type(self): with pytest.raises(Exception): Problem('learning_rate: foo') def test_read_value(self): problem = Problem('learning_rate: 0.4') assert problem.learning_rate == 0.4 def test_default_value(self): problem = Problem(' ') print(problem) assert problem.learning_rate == 0.1 ================================================ FILE: test/test_trainer.py ================================================ # pylint: disable=no-self-use import pytest from layered.trainer import Trainer from layered.problem import Problem @pytest.fixture def problem(): return Problem( """ dataset: Test layers: - activation: Identity size: 3 """) class TestTrainer: def test_no_crash(self, problem): trainer = Trainer(problem) trainer() ================================================ FILE: test/test_utility.py ================================================ # pylint: disable=no-self-use import random from layered.utility import repeated, batched, averaged class MockGenerator: def __init__(self, data): self.data = data self.evaluated = 0 def __iter__(self): for element in self.data: self.evaluated += 1 yield element class MockCustomOperators: def __init__(self, value): self.value = value def __add__(self, other): return MockCustomOperators(self.value + other.value) __radd__ = __add__ def __truediv__(self, other): return MockCustomOperators(self.value / other) class TestRepeated: def test_result(self): iterable = range(14) repeats = repeated(iterable, 3) assert list(repeats) == list(iterable) * 3 def test_generator(self): iterable = MockGenerator([1, 2, 3]) repeats = repeated(iterable, 3) assert iterable.evaluated == 0 list(repeats) assert iterable.evaluated == 3 * 3 class TestBatched: def test_result(self): # pylint: disable=redefined-variable-type iterable = range(14) batches = batched(iterable, 3) batches = list(batches) assert len(batches) == 5 assert len(batches[0]) == 3 assert len(batches[-1]) == 2 def test_generator(self): iterable = MockGenerator([1, 2, 3]) batches = batched(iterable, 3) assert iterable.evaluated == 0 list(batches) assert iterable.evaluated == 3 class TestAveraged: def test_result(self): assert averaged(lambda x: x, [1, 2, 3, 4]) == 2.5 assert averaged(lambda x: x ** 2, [1, 2, 3, 4]) == 7.5 def test_custom_operators(self): iterable = [MockCustomOperators(i) for i in range(1, 5)] assert averaged(lambda x: x, iterable).value == 2.5 def test_supports_booleans(self): iterable = [True] * 5 + [False] * 5 random.shuffle(iterable) assert averaged(lambda x: x, iterable) == 0.5