Repository: eriklindernoren/ML-From-Scratch Branch: master Commit: a2806c6732ee Files: 89 Total size: 265.9 KB Directory structure: gitextract_ltomqai7/ ├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.md ├── mlfromscratch/ │ ├── __init__.py │ ├── data/ │ │ └── TempLinkoping2016.txt │ ├── deep_learning/ │ │ ├── __init__.py │ │ ├── activation_functions.py │ │ ├── layers.py │ │ ├── loss_functions.py │ │ ├── neural_network.py │ │ └── optimizers.py │ ├── examples/ │ │ ├── adaboost.py │ │ ├── apriori.py │ │ ├── bayesian_regression.py │ │ ├── convolutional_neural_network.py │ │ ├── dbscan.py │ │ ├── decision_tree_classifier.py │ │ ├── decision_tree_regressor.py │ │ ├── deep_q_network.py │ │ ├── demo.py │ │ ├── elastic_net.py │ │ ├── fp_growth.py │ │ ├── gaussian_mixture_model.py │ │ ├── genetic_algorithm.py │ │ ├── gradient_boosting_classifier.py │ │ ├── gradient_boosting_regressor.py │ │ ├── k_means.py │ │ ├── k_nearest_neighbors.py │ │ ├── lasso_regression.py │ │ ├── linear_discriminant_analysis.py │ │ ├── linear_regression.py │ │ ├── logistic_regression.py │ │ ├── multi_class_lda.py │ │ ├── multilayer_perceptron.py │ │ ├── naive_bayes.py │ │ ├── neuroevolution.py │ │ ├── particle_swarm_optimization.py │ │ ├── partitioning_around_medoids.py │ │ ├── perceptron.py │ │ ├── polynomial_regression.py │ │ ├── principal_component_analysis.py │ │ ├── random_forest.py │ │ ├── recurrent_neural_network.py │ │ ├── restricted_boltzmann_machine.py │ │ ├── ridge_regression.py │ │ ├── support_vector_machine.py │ │ └── xgboost.py │ ├── reinforcement_learning/ │ │ ├── __init__.py │ │ └── deep_q_network.py │ ├── supervised_learning/ │ │ ├── __init__.py │ │ ├── adaboost.py │ │ ├── bayesian_regression.py │ │ ├── decision_tree.py │ │ ├── gradient_boosting.py │ │ ├── k_nearest_neighbors.py │ │ ├── linear_discriminant_analysis.py │ │ ├── logistic_regression.py │ │ ├── multi_class_lda.py │ │ ├── multilayer_perceptron.py │ │ ├── naive_bayes.py │ │ ├── neuroevolution.py │ │ ├── particle_swarm_optimization.py │ │ ├── perceptron.py │ │ ├── random_forest.py │ │ ├── regression.py │ │ ├── support_vector_machine.py │ │ └── xgboost.py │ ├── unsupervised_learning/ │ │ ├── __init__.py │ │ ├── apriori.py │ │ ├── autoencoder.py │ │ ├── dbscan.py │ │ ├── dcgan.py │ │ ├── fp_growth.py │ │ ├── gaussian_mixture_model.py │ │ ├── generative_adversarial_network.py │ │ ├── genetic_algorithm.py │ │ ├── k_means.py │ │ ├── partitioning_around_medoids.py │ │ ├── principal_component_analysis.py │ │ └── restricted_boltzmann_machine.py │ └── utils/ │ ├── __init__.py │ ├── data_manipulation.py │ ├── data_operation.py │ ├── kernels.py │ └── misc.py ├── requirements.txt ├── setup.cfg └── setup.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ *~ \.DS_STORE build/ dist/ *egg-info* *__pycache__/ *.py[cod] *eggs* *\.png ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2017 Erik Linder-Norén Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: MANIFEST.in ================================================ recursive-include mlfs.data * ================================================ FILE: README.md ================================================ # Machine Learning From Scratch ## About Python implementations of some of the fundamental Machine Learning models and algorithms from scratch. The purpose of this project is not to produce as optimized and computationally efficient algorithms as possible but rather to present the inner workings of them in a transparent and accessible way. ## Table of Contents - [Machine Learning From Scratch](#machine-learning-from-scratch) * [About](#about) * [Table of Contents](#table-of-contents) * [Installation](#installation) * [Examples](#examples) + [Polynomial Regression](#polynomial-regression) + [Classification With CNN](#classification-with-cnn) + [Density-Based Clustering](#density-based-clustering) + [Generating Handwritten Digits](#generating-handwritten-digits) + [Deep Reinforcement Learning](#deep-reinforcement-learning) + [Image Reconstruction With RBM](#image-reconstruction-with-rbm) + [Evolutionary Evolved Neural Network](#evolutionary-evolved-neural-network) + [Genetic Algorithm](#genetic-algorithm) + [Association Analysis](#association-analysis) * [Implementations](#implementations) + [Supervised Learning](#supervised-learning) + [Unsupervised Learning](#unsupervised-learning) + [Reinforcement Learning](#reinforcement-learning) + [Deep Learning](#deep-learning) * [Contact](#contact) ## Installation $ git clone https://github.com/eriklindernoren/ML-From-Scratch $ cd ML-From-Scratch $ python setup.py install ## Examples ### Polynomial Regression $ python mlfromscratch/examples/polynomial_regression.py

Figure: Training progress of a regularized polynomial regression model fitting
temperature data measured in Linköping, Sweden 2016.

### Classification With CNN $ python mlfromscratch/examples/convolutional_neural_network.py +---------+ | ConvNet | +---------+ Input Shape: (1, 8, 8) +----------------------+------------+--------------+ | Layer Type | Parameters | Output Shape | +----------------------+------------+--------------+ | Conv2D | 160 | (16, 8, 8) | | Activation (ReLU) | 0 | (16, 8, 8) | | Dropout | 0 | (16, 8, 8) | | BatchNormalization | 2048 | (16, 8, 8) | | Conv2D | 4640 | (32, 8, 8) | | Activation (ReLU) | 0 | (32, 8, 8) | | Dropout | 0 | (32, 8, 8) | | BatchNormalization | 4096 | (32, 8, 8) | | Flatten | 0 | (2048,) | | Dense | 524544 | (256,) | | Activation (ReLU) | 0 | (256,) | | Dropout | 0 | (256,) | | BatchNormalization | 512 | (256,) | | Dense | 2570 | (10,) | | Activation (Softmax) | 0 | (10,) | +----------------------+------------+--------------+ Total Parameters: 538570 Training: 100% [------------------------------------------------------------------------] Time: 0:01:55 Accuracy: 0.987465181058

Figure: Classification of the digit dataset using CNN.

### Density-Based Clustering $ python mlfromscratch/examples/dbscan.py

Figure: Clustering of the moons dataset using DBSCAN.

### Generating Handwritten Digits $ python mlfromscratch/unsupervised_learning/generative_adversarial_network.py +-----------+ | Generator | +-----------+ Input Shape: (100,) +------------------------+------------+--------------+ | Layer Type | Parameters | Output Shape | +------------------------+------------+--------------+ | Dense | 25856 | (256,) | | Activation (LeakyReLU) | 0 | (256,) | | BatchNormalization | 512 | (256,) | | Dense | 131584 | (512,) | | Activation (LeakyReLU) | 0 | (512,) | | BatchNormalization | 1024 | (512,) | | Dense | 525312 | (1024,) | | Activation (LeakyReLU) | 0 | (1024,) | | BatchNormalization | 2048 | (1024,) | | Dense | 803600 | (784,) | | Activation (TanH) | 0 | (784,) | +------------------------+------------+--------------+ Total Parameters: 1489936 +---------------+ | Discriminator | +---------------+ Input Shape: (784,) +------------------------+------------+--------------+ | Layer Type | Parameters | Output Shape | +------------------------+------------+--------------+ | Dense | 401920 | (512,) | | Activation (LeakyReLU) | 0 | (512,) | | Dropout | 0 | (512,) | | Dense | 131328 | (256,) | | Activation (LeakyReLU) | 0 | (256,) | | Dropout | 0 | (256,) | | Dense | 514 | (2,) | | Activation (Softmax) | 0 | (2,) | +------------------------+------------+--------------+ Total Parameters: 533762

Figure: Training progress of a Generative Adversarial Network generating
handwritten digits.

### Deep Reinforcement Learning $ python mlfromscratch/examples/deep_q_network.py +----------------+ | Deep Q-Network | +----------------+ Input Shape: (4,) +-------------------+------------+--------------+ | Layer Type | Parameters | Output Shape | +-------------------+------------+--------------+ | Dense | 320 | (64,) | | Activation (ReLU) | 0 | (64,) | | Dense | 130 | (2,) | +-------------------+------------+--------------+ Total Parameters: 450

Figure: Deep Q-Network solution to the CartPole-v1 environment in OpenAI gym.

### Image Reconstruction With RBM $ python mlfromscratch/examples/restricted_boltzmann_machine.py

Figure: Shows how the network gets better during training at reconstructing
the digit 2 in the MNIST dataset.

### Evolutionary Evolved Neural Network $ python mlfromscratch/examples/neuroevolution.py +---------------+ | Model Summary | +---------------+ Input Shape: (64,) +----------------------+------------+--------------+ | Layer Type | Parameters | Output Shape | +----------------------+------------+--------------+ | Dense | 1040 | (16,) | | Activation (ReLU) | 0 | (16,) | | Dense | 170 | (10,) | | Activation (Softmax) | 0 | (10,) | +----------------------+------------+--------------+ Total Parameters: 1210 Population Size: 100 Generations: 3000 Mutation Rate: 0.01 [0 Best Individual - Fitness: 3.08301, Accuracy: 10.5%] [1 Best Individual - Fitness: 3.08746, Accuracy: 12.0%] ... [2999 Best Individual - Fitness: 94.08513, Accuracy: 98.5%] Test set accuracy: 96.7%

Figure: Classification of the digit dataset by a neural network which has
been evolutionary evolved.

### Genetic Algorithm $ python mlfromscratch/examples/genetic_algorithm.py +--------+ | GA | +--------+ Description: Implementation of a Genetic Algorithm which aims to produce the user specified target string. This implementation calculates each candidate's fitness based on the alphabetical distance between the candidate and the target. A candidate is selected as a parent with probabilities proportional to the candidate's fitness. Reproduction is implemented as a single-point crossover between pairs of parents. Mutation is done by randomly assigning new characters with uniform probability. Parameters ---------- Target String: 'Genetic Algorithm' Population Size: 100 Mutation Rate: 0.05 [0 Closest Candidate: 'CJqlJguPlqzvpoJmb', Fitness: 0.00] [1 Closest Candidate: 'MCxZxdr nlfiwwGEk', Fitness: 0.01] [2 Closest Candidate: 'MCxZxdm nlfiwwGcx', Fitness: 0.01] [3 Closest Candidate: 'SmdsAklMHn kBIwKn', Fitness: 0.01] [4 Closest Candidate: ' lotneaJOasWfu Z', Fitness: 0.01] ... [292 Closest Candidate: 'GeneticaAlgorithm', Fitness: 1.00] [293 Closest Candidate: 'GeneticaAlgorithm', Fitness: 1.00] [294 Answer: 'Genetic Algorithm'] ### Association Analysis $ python mlfromscratch/examples/apriori.py +-------------+ | Apriori | +-------------+ Minimum Support: 0.25 Minimum Confidence: 0.8 Transactions: [1, 2, 3, 4] [1, 2, 4] [1, 2] [2, 3, 4] [2, 3] [3, 4] [2, 4] Frequent Itemsets: [1, 2, 3, 4, [1, 2], [1, 4], [2, 3], [2, 4], [3, 4], [1, 2, 4], [2, 3, 4]] Rules: 1 -> 2 (support: 0.43, confidence: 1.0) 4 -> 2 (support: 0.57, confidence: 0.8) [1, 4] -> 2 (support: 0.29, confidence: 1.0) ## Implementations ### Supervised Learning - [Adaboost](mlfromscratch/supervised_learning/adaboost.py) - [Bayesian Regression](mlfromscratch/supervised_learning/bayesian_regression.py) - [Decision Tree](mlfromscratch/supervised_learning/decision_tree.py) - [Elastic Net](mlfromscratch/supervised_learning/regression.py) - [Gradient Boosting](mlfromscratch/supervised_learning/gradient_boosting.py) - [K Nearest Neighbors](mlfromscratch/supervised_learning/k_nearest_neighbors.py) - [Lasso Regression](mlfromscratch/supervised_learning/regression.py) - [Linear Discriminant Analysis](mlfromscratch/supervised_learning/linear_discriminant_analysis.py) - [Linear Regression](mlfromscratch/supervised_learning/regression.py) - [Logistic Regression](mlfromscratch/supervised_learning/logistic_regression.py) - [Multi-class Linear Discriminant Analysis](mlfromscratch/supervised_learning/multi_class_lda.py) - [Multilayer Perceptron](mlfromscratch/supervised_learning/multilayer_perceptron.py) - [Naive Bayes](mlfromscratch/supervised_learning/naive_bayes.py) - [Neuroevolution](mlfromscratch/supervised_learning/neuroevolution.py) - [Particle Swarm Optimization of Neural Network](mlfromscratch/supervised_learning/particle_swarm_optimization.py) - [Perceptron](mlfromscratch/supervised_learning/perceptron.py) - [Polynomial Regression](mlfromscratch/supervised_learning/regression.py) - [Random Forest](mlfromscratch/supervised_learning/random_forest.py) - [Ridge Regression](mlfromscratch/supervised_learning/regression.py) - [Support Vector Machine](mlfromscratch/supervised_learning/support_vector_machine.py) - [XGBoost](mlfromscratch/supervised_learning/xgboost.py) ### Unsupervised Learning - [Apriori](mlfromscratch/unsupervised_learning/apriori.py) - [Autoencoder](mlfromscratch/unsupervised_learning/autoencoder.py) - [DBSCAN](mlfromscratch/unsupervised_learning/dbscan.py) - [FP-Growth](mlfromscratch/unsupervised_learning/fp_growth.py) - [Gaussian Mixture Model](mlfromscratch/unsupervised_learning/gaussian_mixture_model.py) - [Generative Adversarial Network](mlfromscratch/unsupervised_learning/generative_adversarial_network.py) - [Genetic Algorithm](mlfromscratch/unsupervised_learning/genetic_algorithm.py) - [K-Means](mlfromscratch/unsupervised_learning/k_means.py) - [Partitioning Around Medoids](mlfromscratch/unsupervised_learning/partitioning_around_medoids.py) - [Principal Component Analysis](mlfromscratch/unsupervised_learning/principal_component_analysis.py) - [Restricted Boltzmann Machine](mlfromscratch/unsupervised_learning/restricted_boltzmann_machine.py) ### Reinforcement Learning - [Deep Q-Network](mlfromscratch/reinforcement_learning/deep_q_network.py) ### Deep Learning + [Neural Network](mlfromscratch/deep_learning/neural_network.py) + [Layers](mlfromscratch/deep_learning/layers.py) * Activation Layer * Average Pooling Layer * Batch Normalization Layer * Constant Padding Layer * Convolutional Layer * Dropout Layer * Flatten Layer * Fully-Connected (Dense) Layer * Fully-Connected RNN Layer * Max Pooling Layer * Reshape Layer * Up Sampling Layer * Zero Padding Layer + Model Types * [Convolutional Neural Network](mlfromscratch/examples/convolutional_neural_network.py) * [Multilayer Perceptron](mlfromscratch/examples/multilayer_perceptron.py) * [Recurrent Neural Network](mlfromscratch/examples/recurrent_neural_network.py) ## Contact If there's some implementation you would like to see here or if you're just feeling social, feel free to [email](mailto:eriklindernoren@gmail.com) me or connect with me on [LinkedIn](https://www.linkedin.com/in/eriklindernoren/). ================================================ FILE: mlfromscratch/__init__.py ================================================ ================================================ FILE: mlfromscratch/data/TempLinkoping2016.txt ================================================ time temp 0.00273224 0.1 0.005464481 -4.5 0.008196721 -6.3 0.010928962 -9.6 0.013661202 -9.9 0.016393443 -17.1 0.019125683 -11.6 0.021857923 -6.2 0.024590164 -6.4 0.027322404 -0.5 0.030054645 0.5 0.032786885 -2.4 0.035519126 -7.5 0.038251366 -16.8 0.040983607 -16.6 0.043715847 -14.6 0.046448087 -9.6 0.049180328 -5.8 0.051912568 -8.6 0.054644809 -9.0 0.057377049 -9.7 0.06010929 -6.9 0.06284153 -3.9 0.06557377 1.4 0.068306011 1.9 0.071038251 4.3 0.073770492 6.9 0.076502732 4.3 0.079234973 5.9 0.081967213 3.8 0.084699454 1.5 0.087431694 0.1 0.090163934 4.6 0.092896175 0.8 0.095628415 -0.5 0.098360656 -1.0 0.101092896 4.2 0.103825137 6.6 0.106557377 4.8 0.109289617 4.7 0.112021858 1.3 0.114754098 0.9 0.117486339 -2.8 0.120218579 -3.3 0.12295082 -5.3 0.12568306 -6.8 0.128415301 -5.1 0.131147541 -2.6 0.133879781 -0.5 0.136612022 -0.5 0.139344262 0.1 0.142076503 1.7 0.144808743 2.4 0.147540984 -0.9 0.150273224 -1.3 0.153005464 -1.4 0.155737705 -0.1 0.158469945 -0.7 0.161202186 -2.6 0.163934426 -4.1 0.166666667 -2.7 0.169398907 0.7 0.172131148 2.0 0.174863388 1.7 0.177595628 0.9 0.180327869 0.3 0.183060109 0.9 0.18579235 1.1 0.18852459 0.1 0.191256831 -0.9 0.193989071 0.2 0.196721311 0.1 0.199453552 1.0 0.202185792 3.4 0.204918033 5.2 0.207650273 4.9 0.210382514 4.9 0.213114754 2.2 0.215846995 2.9 0.218579235 5.3 0.221311475 3.7 0.224043716 3.4 0.226775956 2.1 0.229508197 1.8 0.232240437 4.3 0.234972678 7.0 0.237704918 7.7 0.240437158 6.2 0.243169399 7.5 0.245901639 4.9 0.24863388 4.4 0.25136612 3.8 0.254098361 6.4 0.256830601 8.0 0.259562842 7.9 0.262295082 8.9 0.265027322 6.6 0.267759563 6.5 0.270491803 5.8 0.273224044 5.6 0.275956284 4.7 0.278688525 5.5 0.281420765 5.5 0.284153005 5.8 0.286885246 5.3 0.289617486 6.9 0.292349727 5.9 0.295081967 6.1 0.297814208 6.6 0.300546448 6.7 0.303278689 6.5 0.306010929 7.0 0.308743169 5.8 0.31147541 3.0 0.31420765 2.5 0.316939891 2.4 0.319672131 4.3 0.322404372 2.8 0.325136612 3.6 0.327868852 6.8 0.330601093 9.1 0.333333333 8.4 0.336065574 9.3 0.338797814 13.3 0.341530055 10.6 0.344262295 10.5 0.346994536 11.8 0.349726776 14.7 0.352459016 16.2 0.355191257 16.4 0.357923497 16.9 0.360655738 12.3 0.363387978 10.2 0.366120219 11.2 0.368852459 6.1 0.371584699 6.4 0.37431694 6.1 0.37704918 10.4 0.379781421 10.3 0.382513661 11.9 0.385245902 12.9 0.387978142 12.5 0.390710383 17.5 0.393442623 19.9 0.396174863 19.3 0.398907104 11.4 0.401639344 9.7 0.404371585 10.7 0.407103825 13.0 0.409836066 12.4 0.412568306 16.3 0.415300546 19.2 0.418032787 19.2 0.420765027 19.8 0.423497268 19.5 0.426229508 16.6 0.428961749 13.0 0.431693989 12.6 0.43442623 17.6 0.43715847 13.7 0.43989071 11.3 0.442622951 10.2 0.445355191 10.2 0.448087432 11.6 0.450819672 14.2 0.453551913 14.4 0.456284153 17.4 0.459016393 13.1 0.461748634 17.4 0.464480874 15.9 0.467213115 15.9 0.469945355 15.5 0.472677596 16.4 0.475409836 16.7 0.478142077 18.2 0.480874317 20.9 0.483606557 22.2 0.486338798 19.1 0.489071038 16.3 0.491803279 16.6 0.494535519 15.1 0.49726776 14.5 0.5 17.4 0.50273224 16.5 0.505464481 13.7 0.508196721 14.0 0.510928962 14.2 0.513661202 15.6 0.516393443 15.7 0.519125683 15.6 0.521857923 16.2 0.524590164 16.3 0.527322404 18.3 0.530054645 16.6 0.532786885 16.1 0.535519126 15.9 0.538251366 16.0 0.540983607 15.9 0.543715847 16.0 0.546448087 15.7 0.549180328 17.2 0.551912568 19.9 0.554644809 21.0 0.557377049 19.4 0.56010929 20.4 0.56284153 23.1 0.56557377 23.0 0.568306011 19.9 0.571038251 17.6 0.573770492 18.8 0.576502732 17.8 0.579234973 18.6 0.581967213 16.4 0.584699454 15.2 0.587431694 15.3 0.590163934 16.0 0.592896175 18.0 0.595628415 17.7 0.598360656 16.0 0.601092896 16.4 0.603825137 16.7 0.606557377 14.3 0.609289617 12.2 0.612021858 10.0 0.614754098 12.0 0.617486339 16.2 0.620218579 15.9 0.62295082 14.5 0.62568306 15.3 0.628415301 13.3 0.631147541 14.5 0.633879781 15.5 0.636612022 15.3 0.639344262 17.3 0.642076503 15.3 0.644808743 16.4 0.647540984 17.0 0.650273224 20.2 0.653005464 22.4 0.655737705 18.1 0.658469945 11.6 0.661202186 14.6 0.663934426 13.5 0.666666667 17.9 0.669398907 16.4 0.672131148 15.5 0.674863388 15.9 0.677595628 14.1 0.680327869 13.2 0.683060109 14.5 0.68579235 19.0 0.68852459 18.3 0.691256831 18.8 0.693989071 16.8 0.696721311 16.8 0.699453552 14.3 0.702185792 18.4 0.704918033 18.3 0.707650273 18.4 0.710382514 14.9 0.713114754 11.4 0.715846995 12.6 0.718579235 14.0 0.721311475 14.8 0.724043716 9.9 0.726775956 11.4 0.729508197 12.9 0.732240437 12.1 0.734972678 12.8 0.737704918 13.5 0.740437158 12.9 0.743169399 14.0 0.745901639 14.6 0.74863388 12.0 0.75136612 10.5 0.754098361 9.5 0.756830601 7.6 0.759562842 6.4 0.762295082 7.0 0.765027322 8.1 0.767759563 8.1 0.770491803 7.6 0.773224044 7.4 0.775956284 7.2 0.778688525 7.0 0.781420765 6.4 0.784153005 5.8 0.786885246 5.5 0.789617486 6.4 0.792349727 7.3 0.795081967 7.4 0.797814208 7.8 0.800546448 7.9 0.803278689 6.9 0.806010929 6.1 0.808743169 3.7 0.81147541 5.3 0.81420765 6.1 0.816939891 4.3 0.819672131 3.3 0.822404372 8.8 0.825136612 9.8 0.827868852 6.4 0.830601093 4.6 0.833333333 5.2 0.836065574 5.5 0.838797814 1.4 0.841530055 0.5 0.844262295 -2.6 0.846994536 2.4 0.849726776 -0.8 0.852459016 -3.3 0.855191257 -2.8 0.857923497 -3.5 0.860655738 -2.8 0.863387978 -2.2 0.866120219 -0.3 0.868852459 0.0 0.871584699 2.3 0.87431694 4.9 0.87704918 3.1 0.879781421 3.6 0.882513661 5.2 0.885245902 3.8 0.887978142 3.2 0.890710383 7.7 0.893442623 7.8 0.896174863 6.9 0.898907104 2.7 0.901639344 2.8 0.904371585 6.6 0.907103825 1.9 0.909836066 -1.4 0.912568306 2.2 0.915300546 1.9 0.918032787 -1.3 0.920765027 -1.6 0.923497268 -3.2 0.926229508 -2.7 0.928961749 3.7 0.931693989 -3.2 0.93442623 -0.2 0.93715847 9.3 0.93989071 7.1 0.942622951 3.2 0.945355191 1.1 0.948087432 -6.0 0.950819672 1.7 0.953551913 -1.3 0.956284153 -2.2 0.959016393 -1.2 0.961748634 1.0 0.964480874 1.7 0.967213115 3.7 0.969945355 4.7 0.972677596 -0.3 0.975409836 3.5 0.978142077 3.4 0.980874317 3.9 0.983606557 4.5 0.986338798 5.3 0.989071038 2.7 0.991803279 -0.4 0.994535519 4.3 0.99726776 7.0 1 9.3 ================================================ FILE: mlfromscratch/deep_learning/__init__.py ================================================ from .neural_network import NeuralNetwork ================================================ FILE: mlfromscratch/deep_learning/activation_functions.py ================================================ import numpy as np # Collection of activation functions # Reference: https://en.wikipedia.org/wiki/Activation_function class Sigmoid(): def __call__(self, x): return 1 / (1 + np.exp(-x)) def gradient(self, x): return self.__call__(x) * (1 - self.__call__(x)) class Softmax(): def __call__(self, x): e_x = np.exp(x - np.max(x, axis=-1, keepdims=True)) return e_x / np.sum(e_x, axis=-1, keepdims=True) def gradient(self, x): p = self.__call__(x) return p * (1 - p) class TanH(): def __call__(self, x): return 2 / (1 + np.exp(-2*x)) - 1 def gradient(self, x): return 1 - np.power(self.__call__(x), 2) class ReLU(): def __call__(self, x): return np.where(x >= 0, x, 0) def gradient(self, x): return np.where(x >= 0, 1, 0) class LeakyReLU(): def __init__(self, alpha=0.2): self.alpha = alpha def __call__(self, x): return np.where(x >= 0, x, self.alpha * x) def gradient(self, x): return np.where(x >= 0, 1, self.alpha) class ELU(): def __init__(self, alpha=0.1): self.alpha = alpha def __call__(self, x): return np.where(x >= 0.0, x, self.alpha * (np.exp(x) - 1)) def gradient(self, x): return np.where(x >= 0.0, 1, self.__call__(x) + self.alpha) class SELU(): # Reference : https://arxiv.org/abs/1706.02515, # https://github.com/bioinf-jku/SNNs/blob/master/SelfNormalizingNetworks_MLP_MNIST.ipynb def __init__(self): self.alpha = 1.6732632423543772848170429916717 self.scale = 1.0507009873554804934193349852946 def __call__(self, x): return self.scale * np.where(x >= 0.0, x, self.alpha*(np.exp(x)-1)) def gradient(self, x): return self.scale * np.where(x >= 0.0, 1, self.alpha * np.exp(x)) class SoftPlus(): def __call__(self, x): return np.log(1 + np.exp(x)) def gradient(self, x): return 1 / (1 + np.exp(-x)) ================================================ FILE: mlfromscratch/deep_learning/layers.py ================================================ from __future__ import print_function, division import math import numpy as np import copy from mlfromscratch.deep_learning.activation_functions import Sigmoid, ReLU, SoftPlus, LeakyReLU from mlfromscratch.deep_learning.activation_functions import TanH, ELU, SELU, Softmax class Layer(object): def set_input_shape(self, shape): """ Sets the shape that the layer expects of the input in the forward pass method """ self.input_shape = shape def layer_name(self): """ The name of the layer. Used in model summary. """ return self.__class__.__name__ def parameters(self): """ The number of trainable parameters used by the layer """ return 0 def forward_pass(self, X, training): """ Propogates the signal forward in the network """ raise NotImplementedError() def backward_pass(self, accum_grad): """ Propogates the accumulated gradient backwards in the network. If the has trainable weights then these weights are also tuned in this method. As input (accum_grad) it receives the gradient with respect to the output of the layer and returns the gradient with respect to the output of the previous layer. """ raise NotImplementedError() def output_shape(self): """ The shape of the output produced by forward_pass """ raise NotImplementedError() class Dense(Layer): """A fully-connected NN layer. Parameters: ----------- n_units: int The number of neurons in the layer. input_shape: tuple The expected input shape of the layer. For dense layers a single digit specifying the number of features of the input. Must be specified if it is the first layer in the network. """ def __init__(self, n_units, input_shape=None): self.layer_input = None self.input_shape = input_shape self.n_units = n_units self.trainable = True self.W = None self.w0 = None def initialize(self, optimizer): # Initialize the weights limit = 1 / math.sqrt(self.input_shape[0]) self.W = np.random.uniform(-limit, limit, (self.input_shape[0], self.n_units)) self.w0 = np.zeros((1, self.n_units)) # Weight optimizers self.W_opt = copy.copy(optimizer) self.w0_opt = copy.copy(optimizer) def parameters(self): return np.prod(self.W.shape) + np.prod(self.w0.shape) def forward_pass(self, X, training=True): self.layer_input = X return X.dot(self.W) + self.w0 def backward_pass(self, accum_grad): # Save weights used during forwards pass W = self.W if self.trainable: # Calculate gradient w.r.t layer weights grad_w = self.layer_input.T.dot(accum_grad) grad_w0 = np.sum(accum_grad, axis=0, keepdims=True) # Update the layer weights self.W = self.W_opt.update(self.W, grad_w) self.w0 = self.w0_opt.update(self.w0, grad_w0) # Return accumulated gradient for next layer # Calculated based on the weights used during the forward pass accum_grad = accum_grad.dot(W.T) return accum_grad def output_shape(self): return (self.n_units, ) class RNN(Layer): """A Vanilla Fully-Connected Recurrent Neural Network layer. Parameters: ----------- n_units: int The number of hidden states in the layer. activation: string The name of the activation function which will be applied to the output of each state. bptt_trunc: int Decides how many time steps the gradient should be propagated backwards through states given the loss gradient for time step t. input_shape: tuple The expected input shape of the layer. For dense layers a single digit specifying the number of features of the input. Must be specified if it is the first layer in the network. Reference: http://www.wildml.com/2015/09/recurrent-neural-networks-tutorial-part-2-implementing-a-language-model-rnn-with-python-numpy-and-theano/ """ def __init__(self, n_units, activation='tanh', bptt_trunc=5, input_shape=None): self.input_shape = input_shape self.n_units = n_units self.activation = activation_functions[activation]() self.trainable = True self.bptt_trunc = bptt_trunc self.W = None # Weight of the previous state self.V = None # Weight of the output self.U = None # Weight of the input def initialize(self, optimizer): timesteps, input_dim = self.input_shape # Initialize the weights limit = 1 / math.sqrt(input_dim) self.U = np.random.uniform(-limit, limit, (self.n_units, input_dim)) limit = 1 / math.sqrt(self.n_units) self.V = np.random.uniform(-limit, limit, (input_dim, self.n_units)) self.W = np.random.uniform(-limit, limit, (self.n_units, self.n_units)) # Weight optimizers self.U_opt = copy.copy(optimizer) self.V_opt = copy.copy(optimizer) self.W_opt = copy.copy(optimizer) def parameters(self): return np.prod(self.W.shape) + np.prod(self.U.shape) + np.prod(self.V.shape) def forward_pass(self, X, training=True): self.layer_input = X batch_size, timesteps, input_dim = X.shape # Save these values for use in backprop. self.state_input = np.zeros((batch_size, timesteps, self.n_units)) self.states = np.zeros((batch_size, timesteps+1, self.n_units)) self.outputs = np.zeros((batch_size, timesteps, input_dim)) # Set last time step to zero for calculation of the state_input at time step zero self.states[:, -1] = np.zeros((batch_size, self.n_units)) for t in range(timesteps): # Input to state_t is the current input and output of previous states self.state_input[:, t] = X[:, t].dot(self.U.T) + self.states[:, t-1].dot(self.W.T) self.states[:, t] = self.activation(self.state_input[:, t]) self.outputs[:, t] = self.states[:, t].dot(self.V.T) return self.outputs def backward_pass(self, accum_grad): _, timesteps, _ = accum_grad.shape # Variables where we save the accumulated gradient w.r.t each parameter grad_U = np.zeros_like(self.U) grad_V = np.zeros_like(self.V) grad_W = np.zeros_like(self.W) # The gradient w.r.t the layer input. # Will be passed on to the previous layer in the network accum_grad_next = np.zeros_like(accum_grad) # Back Propagation Through Time for t in reversed(range(timesteps)): # Update gradient w.r.t V at time step t grad_V += accum_grad[:, t].T.dot(self.states[:, t]) # Calculate the gradient w.r.t the state input grad_wrt_state = accum_grad[:, t].dot(self.V) * self.activation.gradient(self.state_input[:, t]) # Gradient w.r.t the layer input accum_grad_next[:, t] = grad_wrt_state.dot(self.U) # Update gradient w.r.t W and U by backprop. from time step t for at most # self.bptt_trunc number of time steps for t_ in reversed(np.arange(max(0, t - self.bptt_trunc), t+1)): grad_U += grad_wrt_state.T.dot(self.layer_input[:, t_]) grad_W += grad_wrt_state.T.dot(self.states[:, t_-1]) # Calculate gradient w.r.t previous state grad_wrt_state = grad_wrt_state.dot(self.W) * self.activation.gradient(self.state_input[:, t_-1]) # Update weights self.U = self.U_opt.update(self.U, grad_U) self.V = self.V_opt.update(self.V, grad_V) self.W = self.W_opt.update(self.W, grad_W) return accum_grad_next def output_shape(self): return self.input_shape class Conv2D(Layer): """A 2D Convolution Layer. Parameters: ----------- n_filters: int The number of filters that will convolve over the input matrix. The number of channels of the output shape. filter_shape: tuple A tuple (filter_height, filter_width). input_shape: tuple The shape of the expected input of the layer. (batch_size, channels, height, width) Only needs to be specified for first layer in the network. padding: string Either 'same' or 'valid'. 'same' results in padding being added so that the output height and width matches the input height and width. For 'valid' no padding is added. stride: int The stride length of the filters during the convolution over the input. """ def __init__(self, n_filters, filter_shape, input_shape=None, padding='same', stride=1): self.n_filters = n_filters self.filter_shape = filter_shape self.padding = padding self.stride = stride self.input_shape = input_shape self.trainable = True def initialize(self, optimizer): # Initialize the weights filter_height, filter_width = self.filter_shape channels = self.input_shape[0] limit = 1 / math.sqrt(np.prod(self.filter_shape)) self.W = np.random.uniform(-limit, limit, size=(self.n_filters, channels, filter_height, filter_width)) self.w0 = np.zeros((self.n_filters, 1)) # Weight optimizers self.W_opt = copy.copy(optimizer) self.w0_opt = copy.copy(optimizer) def parameters(self): return np.prod(self.W.shape) + np.prod(self.w0.shape) def forward_pass(self, X, training=True): batch_size, channels, height, width = X.shape self.layer_input = X # Turn image shape into column shape # (enables dot product between input and weights) self.X_col = image_to_column(X, self.filter_shape, stride=self.stride, output_shape=self.padding) # Turn weights into column shape self.W_col = self.W.reshape((self.n_filters, -1)) # Calculate output output = self.W_col.dot(self.X_col) + self.w0 # Reshape into (n_filters, out_height, out_width, batch_size) output = output.reshape(self.output_shape() + (batch_size, )) # Redistribute axises so that batch size comes first return output.transpose(3,0,1,2) def backward_pass(self, accum_grad): # Reshape accumulated gradient into column shape accum_grad = accum_grad.transpose(1, 2, 3, 0).reshape(self.n_filters, -1) if self.trainable: # Take dot product between column shaped accum. gradient and column shape # layer input to determine the gradient at the layer with respect to layer weights grad_w = accum_grad.dot(self.X_col.T).reshape(self.W.shape) # The gradient with respect to bias terms is the sum similarly to in Dense layer grad_w0 = np.sum(accum_grad, axis=1, keepdims=True) # Update the layers weights self.W = self.W_opt.update(self.W, grad_w) self.w0 = self.w0_opt.update(self.w0, grad_w0) # Recalculate the gradient which will be propogated back to prev. layer accum_grad = self.W_col.T.dot(accum_grad) # Reshape from column shape to image shape accum_grad = column_to_image(accum_grad, self.layer_input.shape, self.filter_shape, stride=self.stride, output_shape=self.padding) return accum_grad def output_shape(self): channels, height, width = self.input_shape pad_h, pad_w = determine_padding(self.filter_shape, output_shape=self.padding) output_height = (height + np.sum(pad_h) - self.filter_shape[0]) / self.stride + 1 output_width = (width + np.sum(pad_w) - self.filter_shape[1]) / self.stride + 1 return self.n_filters, int(output_height), int(output_width) class BatchNormalization(Layer): """Batch normalization. """ def __init__(self, momentum=0.99): self.momentum = momentum self.trainable = True self.eps = 0.01 self.running_mean = None self.running_var = None def initialize(self, optimizer): # Initialize the parameters self.gamma = np.ones(self.input_shape) self.beta = np.zeros(self.input_shape) # parameter optimizers self.gamma_opt = copy.copy(optimizer) self.beta_opt = copy.copy(optimizer) def parameters(self): return np.prod(self.gamma.shape) + np.prod(self.beta.shape) def forward_pass(self, X, training=True): # Initialize running mean and variance if first run if self.running_mean is None: self.running_mean = np.mean(X, axis=0) self.running_var = np.var(X, axis=0) if training and self.trainable: mean = np.mean(X, axis=0) var = np.var(X, axis=0) self.running_mean = self.momentum * self.running_mean + (1 - self.momentum) * mean self.running_var = self.momentum * self.running_var + (1 - self.momentum) * var else: mean = self.running_mean var = self.running_var # Statistics saved for backward pass self.X_centered = X - mean self.stddev_inv = 1 / np.sqrt(var + self.eps) X_norm = self.X_centered * self.stddev_inv output = self.gamma * X_norm + self.beta return output def backward_pass(self, accum_grad): # Save parameters used during the forward pass gamma = self.gamma # If the layer is trainable the parameters are updated if self.trainable: X_norm = self.X_centered * self.stddev_inv grad_gamma = np.sum(accum_grad * X_norm, axis=0) grad_beta = np.sum(accum_grad, axis=0) self.gamma = self.gamma_opt.update(self.gamma, grad_gamma) self.beta = self.beta_opt.update(self.beta, grad_beta) batch_size = accum_grad.shape[0] # The gradient of the loss with respect to the layer inputs (use weights and statistics from forward pass) accum_grad = (1 / batch_size) * gamma * self.stddev_inv * ( batch_size * accum_grad - np.sum(accum_grad, axis=0) - self.X_centered * self.stddev_inv**2 * np.sum(accum_grad * self.X_centered, axis=0) ) return accum_grad def output_shape(self): return self.input_shape class PoolingLayer(Layer): """A parent class of MaxPooling2D and AveragePooling2D """ def __init__(self, pool_shape=(2, 2), stride=1, padding=0): self.pool_shape = pool_shape self.stride = stride self.padding = padding self.trainable = True def forward_pass(self, X, training=True): self.layer_input = X batch_size, channels, height, width = X.shape _, out_height, out_width = self.output_shape() X = X.reshape(batch_size*channels, 1, height, width) X_col = image_to_column(X, self.pool_shape, self.stride, self.padding) # MaxPool or AveragePool specific method output = self._pool_forward(X_col) output = output.reshape(out_height, out_width, batch_size, channels) output = output.transpose(2, 3, 0, 1) return output def backward_pass(self, accum_grad): batch_size, _, _, _ = accum_grad.shape channels, height, width = self.input_shape accum_grad = accum_grad.transpose(2, 3, 0, 1).ravel() # MaxPool or AveragePool specific method accum_grad_col = self._pool_backward(accum_grad) accum_grad = column_to_image(accum_grad_col, (batch_size * channels, 1, height, width), self.pool_shape, self.stride, 0) accum_grad = accum_grad.reshape((batch_size,) + self.input_shape) return accum_grad def output_shape(self): channels, height, width = self.input_shape out_height = (height - self.pool_shape[0]) / self.stride + 1 out_width = (width - self.pool_shape[1]) / self.stride + 1 assert out_height % 1 == 0 assert out_width % 1 == 0 return channels, int(out_height), int(out_width) class MaxPooling2D(PoolingLayer): def _pool_forward(self, X_col): arg_max = np.argmax(X_col, axis=0).flatten() output = X_col[arg_max, range(arg_max.size)] self.cache = arg_max return output def _pool_backward(self, accum_grad): accum_grad_col = np.zeros((np.prod(self.pool_shape), accum_grad.size)) arg_max = self.cache accum_grad_col[arg_max, range(accum_grad.size)] = accum_grad return accum_grad_col class AveragePooling2D(PoolingLayer): def _pool_forward(self, X_col): output = np.mean(X_col, axis=0) return output def _pool_backward(self, accum_grad): accum_grad_col = np.zeros((np.prod(self.pool_shape), accum_grad.size)) accum_grad_col[:, range(accum_grad.size)] = 1. / accum_grad_col.shape[0] * accum_grad return accum_grad_col class ConstantPadding2D(Layer): """Adds rows and columns of constant values to the input. Expects the input to be of shape (batch_size, channels, height, width) Parameters: ----------- padding: tuple The amount of padding along the height and width dimension of the input. If (pad_h, pad_w) the same symmetric padding is applied along height and width dimension. If ((pad_h0, pad_h1), (pad_w0, pad_w1)) the specified padding is added to beginning and end of the height and width dimension. padding_value: int or tuple The value the is added as padding. """ def __init__(self, padding, padding_value=0): self.padding = padding self.trainable = True if not isinstance(padding[0], tuple): self.padding = ((padding[0], padding[0]), padding[1]) if not isinstance(padding[1], tuple): self.padding = (self.padding[0], (padding[1], padding[1])) self.padding_value = padding_value def forward_pass(self, X, training=True): output = np.pad(X, pad_width=((0,0), (0,0), self.padding[0], self.padding[1]), mode="constant", constant_values=self.padding_value) return output def backward_pass(self, accum_grad): pad_top, pad_left = self.padding[0][0], self.padding[1][0] height, width = self.input_shape[1], self.input_shape[2] accum_grad = accum_grad[:, :, pad_top:pad_top+height, pad_left:pad_left+width] return accum_grad def output_shape(self): new_height = self.input_shape[1] + np.sum(self.padding[0]) new_width = self.input_shape[2] + np.sum(self.padding[1]) return (self.input_shape[0], new_height, new_width) class ZeroPadding2D(ConstantPadding2D): """Adds rows and columns of zero values to the input. Expects the input to be of shape (batch_size, channels, height, width) Parameters: ----------- padding: tuple The amount of padding along the height and width dimension of the input. If (pad_h, pad_w) the same symmetric padding is applied along height and width dimension. If ((pad_h0, pad_h1), (pad_w0, pad_w1)) the specified padding is added to beginning and end of the height and width dimension. """ def __init__(self, padding): self.padding = padding if isinstance(padding[0], int): self.padding = ((padding[0], padding[0]), padding[1]) if isinstance(padding[1], int): self.padding = (self.padding[0], (padding[1], padding[1])) self.padding_value = 0 class Flatten(Layer): """ Turns a multidimensional matrix into two-dimensional """ def __init__(self, input_shape=None): self.prev_shape = None self.trainable = True self.input_shape = input_shape def forward_pass(self, X, training=True): self.prev_shape = X.shape return X.reshape((X.shape[0], -1)) def backward_pass(self, accum_grad): return accum_grad.reshape(self.prev_shape) def output_shape(self): return (np.prod(self.input_shape),) class UpSampling2D(Layer): """ Nearest neighbor up sampling of the input. Repeats the rows and columns of the data by size[0] and size[1] respectively. Parameters: ----------- size: tuple (size_y, size_x) - The number of times each axis will be repeated. """ def __init__(self, size=(2,2), input_shape=None): self.prev_shape = None self.trainable = True self.size = size self.input_shape = input_shape def forward_pass(self, X, training=True): self.prev_shape = X.shape # Repeat each axis as specified by size X_new = X.repeat(self.size[0], axis=2).repeat(self.size[1], axis=3) return X_new def backward_pass(self, accum_grad): # Down sample input to previous shape accum_grad = accum_grad[:, :, ::self.size[0], ::self.size[1]] return accum_grad def output_shape(self): channels, height, width = self.input_shape return channels, self.size[0] * height, self.size[1] * width class Reshape(Layer): """ Reshapes the input tensor into specified shape Parameters: ----------- shape: tuple The shape which the input shall be reshaped to. """ def __init__(self, shape, input_shape=None): self.prev_shape = None self.trainable = True self.shape = shape self.input_shape = input_shape def forward_pass(self, X, training=True): self.prev_shape = X.shape return X.reshape((X.shape[0], ) + self.shape) def backward_pass(self, accum_grad): return accum_grad.reshape(self.prev_shape) def output_shape(self): return self.shape class Dropout(Layer): """A layer that randomly sets a fraction p of the output units of the previous layer to zero. Parameters: ----------- p: float The probability that unit x is set to zero. """ def __init__(self, p=0.2): self.p = p self._mask = None self.input_shape = None self.n_units = None self.pass_through = True self.trainable = True def forward_pass(self, X, training=True): c = (1 - self.p) if training: self._mask = np.random.uniform(size=X.shape) > self.p c = self._mask return X * c def backward_pass(self, accum_grad): return accum_grad * self._mask def output_shape(self): return self.input_shape activation_functions = { 'relu': ReLU, 'sigmoid': Sigmoid, 'selu': SELU, 'elu': ELU, 'softmax': Softmax, 'leaky_relu': LeakyReLU, 'tanh': TanH, 'softplus': SoftPlus } class Activation(Layer): """A layer that applies an activation operation to the input. Parameters: ----------- name: string The name of the activation function that will be used. """ def __init__(self, name): self.activation_name = name self.activation_func = activation_functions[name]() self.trainable = True def layer_name(self): return "Activation (%s)" % (self.activation_func.__class__.__name__) def forward_pass(self, X, training=True): self.layer_input = X return self.activation_func(X) def backward_pass(self, accum_grad): return accum_grad * self.activation_func.gradient(self.layer_input) def output_shape(self): return self.input_shape # Method which calculates the padding based on the specified output shape and the # shape of the filters def determine_padding(filter_shape, output_shape="same"): # No padding if output_shape == "valid": return (0, 0), (0, 0) # Pad so that the output shape is the same as input shape (given that stride=1) elif output_shape == "same": filter_height, filter_width = filter_shape # Derived from: # output_height = (height + pad_h - filter_height) / stride + 1 # In this case output_height = height and stride = 1. This gives the # expression for the padding below. pad_h1 = int(math.floor((filter_height - 1)/2)) pad_h2 = int(math.ceil((filter_height - 1)/2)) pad_w1 = int(math.floor((filter_width - 1)/2)) pad_w2 = int(math.ceil((filter_width - 1)/2)) return (pad_h1, pad_h2), (pad_w1, pad_w2) # Reference: CS231n Stanford def get_im2col_indices(images_shape, filter_shape, padding, stride=1): # First figure out what the size of the output should be batch_size, channels, height, width = images_shape filter_height, filter_width = filter_shape pad_h, pad_w = padding out_height = int((height + np.sum(pad_h) - filter_height) / stride + 1) out_width = int((width + np.sum(pad_w) - filter_width) / stride + 1) i0 = np.repeat(np.arange(filter_height), filter_width) i0 = np.tile(i0, channels) i1 = stride * np.repeat(np.arange(out_height), out_width) j0 = np.tile(np.arange(filter_width), filter_height * channels) j1 = stride * np.tile(np.arange(out_width), out_height) i = i0.reshape(-1, 1) + i1.reshape(1, -1) j = j0.reshape(-1, 1) + j1.reshape(1, -1) k = np.repeat(np.arange(channels), filter_height * filter_width).reshape(-1, 1) return (k, i, j) # Method which turns the image shaped input to column shape. # Used during the forward pass. # Reference: CS231n Stanford def image_to_column(images, filter_shape, stride, output_shape='same'): filter_height, filter_width = filter_shape pad_h, pad_w = determine_padding(filter_shape, output_shape) # Add padding to the image images_padded = np.pad(images, ((0, 0), (0, 0), pad_h, pad_w), mode='constant') # Calculate the indices where the dot products are to be applied between weights # and the image k, i, j = get_im2col_indices(images.shape, filter_shape, (pad_h, pad_w), stride) # Get content from image at those indices cols = images_padded[:, k, i, j] channels = images.shape[1] # Reshape content into column shape cols = cols.transpose(1, 2, 0).reshape(filter_height * filter_width * channels, -1) return cols # Method which turns the column shaped input to image shape. # Used during the backward pass. # Reference: CS231n Stanford def column_to_image(cols, images_shape, filter_shape, stride, output_shape='same'): batch_size, channels, height, width = images_shape pad_h, pad_w = determine_padding(filter_shape, output_shape) height_padded = height + np.sum(pad_h) width_padded = width + np.sum(pad_w) images_padded = np.zeros((batch_size, channels, height_padded, width_padded)) # Calculate the indices where the dot products are applied between weights # and the image k, i, j = get_im2col_indices(images_shape, filter_shape, (pad_h, pad_w), stride) cols = cols.reshape(channels * np.prod(filter_shape), -1, batch_size) cols = cols.transpose(2, 0, 1) # Add column content to the images at the indices np.add.at(images_padded, (slice(None), k, i, j), cols) # Return image without padding return images_padded[:, :, pad_h[0]:height+pad_h[0], pad_w[0]:width+pad_w[0]] ================================================ FILE: mlfromscratch/deep_learning/loss_functions.py ================================================ from __future__ import division import numpy as np from mlfromscratch.utils import accuracy_score from mlfromscratch.deep_learning.activation_functions import Sigmoid class Loss(object): def loss(self, y_true, y_pred): return NotImplementedError() def gradient(self, y, y_pred): raise NotImplementedError() def acc(self, y, y_pred): return 0 class SquareLoss(Loss): def __init__(self): pass def loss(self, y, y_pred): return 0.5 * np.power((y - y_pred), 2) def gradient(self, y, y_pred): return -(y - y_pred) class CrossEntropy(Loss): def __init__(self): pass def loss(self, y, p): # Avoid division by zero p = np.clip(p, 1e-15, 1 - 1e-15) return - y * np.log(p) - (1 - y) * np.log(1 - p) def acc(self, y, p): return accuracy_score(np.argmax(y, axis=1), np.argmax(p, axis=1)) def gradient(self, y, p): # Avoid division by zero p = np.clip(p, 1e-15, 1 - 1e-15) return - (y / p) + (1 - y) / (1 - p) ================================================ FILE: mlfromscratch/deep_learning/neural_network.py ================================================ from __future__ import print_function, division from terminaltables import AsciiTable import numpy as np import progressbar from mlfromscratch.utils import batch_iterator from mlfromscratch.utils.misc import bar_widgets class NeuralNetwork(): """Neural Network. Deep Learning base model. Parameters: ----------- optimizer: class The weight optimizer that will be used to tune the weights in order of minimizing the loss. loss: class Loss function used to measure the model's performance. SquareLoss or CrossEntropy. validation: tuple A tuple containing validation data and labels (X, y) """ def __init__(self, optimizer, loss, validation_data=None): self.optimizer = optimizer self.layers = [] self.errors = {"training": [], "validation": []} self.loss_function = loss() self.progressbar = progressbar.ProgressBar(widgets=bar_widgets) self.val_set = None if validation_data: X, y = validation_data self.val_set = {"X": X, "y": y} def set_trainable(self, trainable): """ Method which enables freezing of the weights of the network's layers. """ for layer in self.layers: layer.trainable = trainable def add(self, layer): """ Method which adds a layer to the neural network """ # If this is not the first layer added then set the input shape # to the output shape of the last added layer if self.layers: layer.set_input_shape(shape=self.layers[-1].output_shape()) # If the layer has weights that needs to be initialized if hasattr(layer, 'initialize'): layer.initialize(optimizer=self.optimizer) # Add layer to the network self.layers.append(layer) def test_on_batch(self, X, y): """ Evaluates the model over a single batch of samples """ y_pred = self._forward_pass(X, training=False) loss = np.mean(self.loss_function.loss(y, y_pred)) acc = self.loss_function.acc(y, y_pred) return loss, acc def train_on_batch(self, X, y): """ Single gradient update over one batch of samples """ y_pred = self._forward_pass(X) loss = np.mean(self.loss_function.loss(y, y_pred)) acc = self.loss_function.acc(y, y_pred) # Calculate the gradient of the loss function wrt y_pred loss_grad = self.loss_function.gradient(y, y_pred) # Backpropagate. Update weights self._backward_pass(loss_grad=loss_grad) return loss, acc def fit(self, X, y, n_epochs, batch_size): """ Trains the model for a fixed number of epochs """ for _ in self.progressbar(range(n_epochs)): batch_error = [] for X_batch, y_batch in batch_iterator(X, y, batch_size=batch_size): loss, _ = self.train_on_batch(X_batch, y_batch) batch_error.append(loss) self.errors["training"].append(np.mean(batch_error)) if self.val_set is not None: val_loss, _ = self.test_on_batch(self.val_set["X"], self.val_set["y"]) self.errors["validation"].append(val_loss) return self.errors["training"], self.errors["validation"] def _forward_pass(self, X, training=True): """ Calculate the output of the NN """ layer_output = X for layer in self.layers: layer_output = layer.forward_pass(layer_output, training) return layer_output def _backward_pass(self, loss_grad): """ Propagate the gradient 'backwards' and update the weights in each layer """ for layer in reversed(self.layers): loss_grad = layer.backward_pass(loss_grad) def summary(self, name="Model Summary"): # Print model name print (AsciiTable([[name]]).table) # Network input shape (first layer's input shape) print ("Input Shape: %s" % str(self.layers[0].input_shape)) # Iterate through network and get each layer's configuration table_data = [["Layer Type", "Parameters", "Output Shape"]] tot_params = 0 for layer in self.layers: layer_name = layer.layer_name() params = layer.parameters() out_shape = layer.output_shape() table_data.append([layer_name, str(params), str(out_shape)]) tot_params += params # Print network configuration table print (AsciiTable(table_data).table) print ("Total Parameters: %d\n" % tot_params) def predict(self, X): """ Use the trained model to predict labels of X """ return self._forward_pass(X, training=False) ================================================ FILE: mlfromscratch/deep_learning/optimizers.py ================================================ import numpy as np from mlfromscratch.utils import make_diagonal, normalize # Optimizers for models that use gradient based methods for finding the # weights that minimizes the loss. # A great resource for understanding these methods: # http://sebastianruder.com/optimizing-gradient-descent/index.html class StochasticGradientDescent(): def __init__(self, learning_rate=0.01, momentum=0): self.learning_rate = learning_rate self.momentum = momentum self.w_updt = None def update(self, w, grad_wrt_w): # If not initialized if self.w_updt is None: self.w_updt = np.zeros(np.shape(w)) # Use momentum if set self.w_updt = self.momentum * self.w_updt + (1 - self.momentum) * grad_wrt_w # Move against the gradient to minimize loss return w - self.learning_rate * self.w_updt class NesterovAcceleratedGradient(): def __init__(self, learning_rate=0.001, momentum=0.4): self.learning_rate = learning_rate self.momentum = momentum self.w_updt = np.array([]) def update(self, w, grad_func): # Calculate the gradient of the loss a bit further down the slope from w approx_future_grad = np.clip(grad_func(w - self.momentum * self.w_updt), -1, 1) # Initialize on first update if not self.w_updt.any(): self.w_updt = np.zeros(np.shape(w)) self.w_updt = self.momentum * self.w_updt + self.learning_rate * approx_future_grad # Move against the gradient to minimize loss return w - self.w_updt class Adagrad(): def __init__(self, learning_rate=0.01): self.learning_rate = learning_rate self.G = None # Sum of squares of the gradients self.eps = 1e-8 def update(self, w, grad_wrt_w): # If not initialized if self.G is None: self.G = np.zeros(np.shape(w)) # Add the square of the gradient of the loss function at w self.G += np.power(grad_wrt_w, 2) # Adaptive gradient with higher learning rate for sparse data return w - self.learning_rate * grad_wrt_w / np.sqrt(self.G + self.eps) class Adadelta(): def __init__(self, rho=0.95, eps=1e-6): self.E_w_updt = None # Running average of squared parameter updates self.E_grad = None # Running average of the squared gradient of w self.w_updt = None # Parameter update self.eps = eps self.rho = rho def update(self, w, grad_wrt_w): # If not initialized if self.w_updt is None: self.w_updt = np.zeros(np.shape(w)) self.E_w_updt = np.zeros(np.shape(w)) self.E_grad = np.zeros(np.shape(grad_wrt_w)) # Update average of gradients at w self.E_grad = self.rho * self.E_grad + (1 - self.rho) * np.power(grad_wrt_w, 2) RMS_delta_w = np.sqrt(self.E_w_updt + self.eps) RMS_grad = np.sqrt(self.E_grad + self.eps) # Adaptive learning rate adaptive_lr = RMS_delta_w / RMS_grad # Calculate the update self.w_updt = adaptive_lr * grad_wrt_w # Update the running average of w updates self.E_w_updt = self.rho * self.E_w_updt + (1 - self.rho) * np.power(self.w_updt, 2) return w - self.w_updt class RMSprop(): def __init__(self, learning_rate=0.01, rho=0.9): self.learning_rate = learning_rate self.Eg = None # Running average of the square gradients at w self.eps = 1e-8 self.rho = rho def update(self, w, grad_wrt_w): # If not initialized if self.Eg is None: self.Eg = np.zeros(np.shape(grad_wrt_w)) self.Eg = self.rho * self.Eg + (1 - self.rho) * np.power(grad_wrt_w, 2) # Divide the learning rate for a weight by a running average of the magnitudes of recent # gradients for that weight return w - self.learning_rate * grad_wrt_w / np.sqrt(self.Eg + self.eps) class Adam(): def __init__(self, learning_rate=0.001, b1=0.9, b2=0.999): self.learning_rate = learning_rate self.eps = 1e-8 self.m = None self.v = None # Decay rates self.b1 = b1 self.b2 = b2 def update(self, w, grad_wrt_w): # If not initialized if self.m is None: self.m = np.zeros(np.shape(grad_wrt_w)) self.v = np.zeros(np.shape(grad_wrt_w)) self.m = self.b1 * self.m + (1 - self.b1) * grad_wrt_w self.v = self.b2 * self.v + (1 - self.b2) * np.power(grad_wrt_w, 2) m_hat = self.m / (1 - self.b1) v_hat = self.v / (1 - self.b2) self.w_updt = self.learning_rate * m_hat / (np.sqrt(v_hat) + self.eps) return w - self.w_updt ================================================ FILE: mlfromscratch/examples/adaboost.py ================================================ from __future__ import division, print_function import numpy as np from sklearn import datasets # Import helper functions from mlfromscratch.supervised_learning import Adaboost from mlfromscratch.utils.data_manipulation import train_test_split from mlfromscratch.utils.data_operation import accuracy_score from mlfromscratch.utils import Plot def main(): data = datasets.load_digits() X = data.data y = data.target digit1 = 1 digit2 = 8 idx = np.append(np.where(y == digit1)[0], np.where(y == digit2)[0]) y = data.target[idx] # Change labels to {-1, 1} y[y == digit1] = -1 y[y == digit2] = 1 X = data.data[idx] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) # Adaboost classification with 5 weak classifiers clf = Adaboost(n_clf=5) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimensions to 2d using pca and plot the results Plot().plot_in_2d(X_test, y_pred, title="Adaboost", accuracy=accuracy) if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/apriori.py ================================================ from __future__ import division, print_function import numpy as np from mlfromscratch.unsupervised_learning import Apriori def main(): # Demo transaction set # Example 2: https://en.wikipedia.org/wiki/Apriori_algorithm transactions = np.array([[1, 2, 3, 4], [1, 2, 4], [1, 2], [2, 3, 4], [2, 3], [3, 4], [2, 4]]) print ("+-------------+") print ("| Apriori |") print ("+-------------+") min_sup = 0.25 min_conf = 0.8 print ("Minimum Support: %.2f" % (min_sup)) print ("Minimum Confidence: %s" % (min_conf)) print ("Transactions:") for transaction in transactions: print ("\t%s" % transaction) apriori = Apriori(min_sup=min_sup, min_conf=min_conf) # Get and print the frequent itemsets frequent_itemsets = apriori.find_frequent_itemsets(transactions) print ("Frequent Itemsets:\n\t%s" % frequent_itemsets) # Get and print the rules rules = apriori.generate_rules(transactions) print ("Rules:") for rule in rules: print ("\t%s -> %s (support: %.2f, confidence: %s)" % (rule.antecedent, rule.concequent, rule.support, rule.confidence,)) if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/bayesian_regression.py ================================================ import numpy as np import pandas as pd import matplotlib.pyplot as plt # Import helper functions from mlfromscratch.utils.data_operation import mean_squared_error from mlfromscratch.utils.data_manipulation import train_test_split, polynomial_features from mlfromscratch.supervised_learning import BayesianRegression def main(): # Load temperature data data = pd.read_csv('mlfromscratch/data/TempLinkoping2016.txt', sep="\t") time = np.atleast_2d(data["time"].values).T temp = np.atleast_2d(data["temp"].values).T X = time # fraction of the year [0, 1] y = temp X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) n_samples, n_features = np.shape(X) # Prior parameters # - Weights are assumed distr. according to a Normal distribution # - The variance of the weights are assumed distributed according to # a scaled inverse chi-squared distribution. # High prior uncertainty! # Normal mu0 = np.array([0] * n_features) omega0 = np.diag([.0001] * n_features) # Scaled inverse chi-squared nu0 = 1 sigma_sq0 = 100 # The credible interval cred_int = 10 clf = BayesianRegression(n_draws=2000, poly_degree=4, mu0=mu0, omega0=omega0, nu0=nu0, sigma_sq0=sigma_sq0, cred_int=cred_int) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) mse = mean_squared_error(y_test, y_pred) # Get prediction line y_pred_, y_lower_, y_upper_ = clf.predict(X=X, eti=True) # Print the mean squared error print ("Mean Squared Error:", mse) # Color map cmap = plt.get_cmap('viridis') # Plot the results m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) p1 = plt.plot(366 * X, y_pred_, color="black", linewidth=2, label="Prediction") p2 = plt.plot(366 * X, y_lower_, color="gray", linewidth=2, label="{0}% Credible Interval".format(cred_int)) p3 = plt.plot(366 * X, y_upper_, color="gray", linewidth=2) plt.axis((0, 366, -20, 25)) plt.suptitle("Bayesian Regression") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend(loc='lower right') # plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') plt.legend(loc='lower right') plt.show() if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/convolutional_neural_network.py ================================================ from __future__ import print_function from sklearn import datasets import matplotlib.pyplot as plt import math import numpy as np # Import helper functions from mlfromscratch.deep_learning import NeuralNetwork from mlfromscratch.utils import train_test_split, to_categorical, normalize from mlfromscratch.utils import get_random_subsets, shuffle_data, Plot from mlfromscratch.utils.data_operation import accuracy_score from mlfromscratch.deep_learning.optimizers import StochasticGradientDescent, Adam, RMSprop, Adagrad, Adadelta from mlfromscratch.deep_learning.loss_functions import CrossEntropy from mlfromscratch.utils.misc import bar_widgets from mlfromscratch.deep_learning.layers import Dense, Dropout, Conv2D, Flatten, Activation, MaxPooling2D from mlfromscratch.deep_learning.layers import AveragePooling2D, ZeroPadding2D, BatchNormalization, RNN def main(): #---------- # Conv Net #---------- optimizer = Adam() data = datasets.load_digits() X = data.data y = data.target # Convert to one-hot encoding y = to_categorical(y.astype("int")) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1) # Reshape X to (n_samples, channels, height, width) X_train = X_train.reshape((-1,1,8,8)) X_test = X_test.reshape((-1,1,8,8)) clf = NeuralNetwork(optimizer=optimizer, loss=CrossEntropy, validation_data=(X_test, y_test)) clf.add(Conv2D(n_filters=16, filter_shape=(3,3), stride=1, input_shape=(1,8,8), padding='same')) clf.add(Activation('relu')) clf.add(Dropout(0.25)) clf.add(BatchNormalization()) clf.add(Conv2D(n_filters=32, filter_shape=(3,3), stride=1, padding='same')) clf.add(Activation('relu')) clf.add(Dropout(0.25)) clf.add(BatchNormalization()) clf.add(Flatten()) clf.add(Dense(256)) clf.add(Activation('relu')) clf.add(Dropout(0.4)) clf.add(BatchNormalization()) clf.add(Dense(10)) clf.add(Activation('softmax')) print () clf.summary(name="ConvNet") train_err, val_err = clf.fit(X_train, y_train, n_epochs=50, batch_size=256) # Training and validation error plot n = len(train_err) training, = plt.plot(range(n), train_err, label="Training Error") validation, = plt.plot(range(n), val_err, label="Validation Error") plt.legend(handles=[training, validation]) plt.title("Error Plot") plt.ylabel('Error') plt.xlabel('Iterations') plt.show() _, accuracy = clf.test_on_batch(X_test, y_test) print ("Accuracy:", accuracy) y_pred = np.argmax(clf.predict(X_test), axis=1) X_test = X_test.reshape(-1, 8*8) # Reduce dimension to 2D using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Convolutional Neural Network", accuracy=accuracy, legend_labels=range(10)) if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/dbscan.py ================================================ import sys import os import math import random from sklearn import datasets import numpy as np # Import helper functions from mlfromscratch.utils import Plot from mlfromscratch.unsupervised_learning import DBSCAN def main(): # Load the dataset X, y = datasets.make_moons(n_samples=300, noise=0.08, shuffle=False) # Cluster the data using DBSCAN clf = DBSCAN(eps=0.17, min_samples=5) y_pred = clf.predict(X) # Project the data onto the 2 primary principal components p = Plot() p.plot_in_2d(X, y_pred, title="DBSCAN") p.plot_in_2d(X, y, title="Actual Clustering") if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/decision_tree_classifier.py ================================================ from __future__ import division, print_function import numpy as np from sklearn import datasets import matplotlib.pyplot as plt import sys import os # Import helper functions from mlfromscratch.utils import train_test_split, standardize, accuracy_score from mlfromscratch.utils import mean_squared_error, calculate_variance, Plot from mlfromscratch.supervised_learning import ClassificationTree def main(): print ("-- Classification Tree --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = ClassificationTree() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) Plot().plot_in_2d(X_test, y_pred, title="Decision Tree", accuracy=accuracy, legend_labels=data.target_names) if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/decision_tree_regressor.py ================================================ from __future__ import division, print_function import numpy as np import matplotlib.pyplot as plt import pandas as pd from mlfromscratch.utils import train_test_split, standardize, accuracy_score from mlfromscratch.utils import mean_squared_error, calculate_variance, Plot from mlfromscratch.supervised_learning import RegressionTree def main(): print ("-- Regression Tree --") # Load temperature data data = pd.read_csv('mlfromscratch/data/TempLinkoping2016.txt', sep="\t") time = np.atleast_2d(data["time"].values).T temp = np.atleast_2d(data["temp"].values).T X = standardize(time) # Time. Fraction of the year [0, 1] y = temp[:, 0] # Temperature. Reduce to one-dim X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) model = RegressionTree() model.fit(X_train, y_train) y_pred = model.predict(X_test) y_pred_line = model.predict(X) # Color map cmap = plt.get_cmap('viridis') mse = mean_squared_error(y_test, y_pred) print ("Mean Squared Error:", mse) # Plot the results # Plot the results m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) m3 = plt.scatter(366 * X_test, y_pred, color='black', s=10) plt.suptitle("Regression Tree") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2, m3), ("Training data", "Test data", "Prediction"), loc='lower right') plt.show() if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/deep_q_network.py ================================================ from __future__ import print_function import numpy as np from mlfromscratch.utils import to_categorical from mlfromscratch.deep_learning.optimizers import Adam from mlfromscratch.deep_learning.loss_functions import SquareLoss from mlfromscratch.deep_learning.layers import Dense, Dropout, Flatten, Activation, Reshape, BatchNormalization from mlfromscratch.deep_learning import NeuralNetwork from mlfromscratch.reinforcement_learning import DeepQNetwork def main(): dqn = DeepQNetwork(env_name='CartPole-v1', epsilon=0.9, gamma=0.8, decay_rate=0.005, min_epsilon=0.1) # Model builder def model(n_inputs, n_outputs): clf = NeuralNetwork(optimizer=Adam(), loss=SquareLoss) clf.add(Dense(64, input_shape=(n_inputs,))) clf.add(Activation('relu')) clf.add(Dense(n_outputs)) return clf dqn.set_model(model) print () dqn.model.summary(name="Deep Q-Network") dqn.train(n_epochs=500) dqn.play(n_epochs=100) if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/demo.py ================================================ from __future__ import print_function from sklearn import datasets import numpy as np import math import matplotlib.pyplot as plt from mlfromscratch.utils import train_test_split, normalize, to_categorical, accuracy_score from mlfromscratch.deep_learning.optimizers import Adam from mlfromscratch.deep_learning.loss_functions import CrossEntropy from mlfromscratch.deep_learning.activation_functions import Softmax from mlfromscratch.utils.kernels import * from mlfromscratch.supervised_learning import * from mlfromscratch.deep_learning import * from mlfromscratch.unsupervised_learning import PCA from mlfromscratch.deep_learning.layers import Dense, Dropout, Conv2D, Flatten, Activation print ("+-------------------------------------------+") print ("| |") print ("| Machine Learning From Scratch |") print ("| |") print ("+-------------------------------------------+") # ........... # LOAD DATA # ........... data = datasets.load_digits() digit1 = 1 digit2 = 8 idx = np.append(np.where(data.target == digit1)[0], np.where(data.target == digit2)[0]) y = data.target[idx] # Change labels to {0, 1} y[y == digit1] = 0 y[y == digit2] = 1 X = data.data[idx] X = normalize(X) print ("Dataset: The Digit Dataset (digits %s and %s)" % (digit1, digit2)) # .......................... # DIMENSIONALITY REDUCTION # .......................... pca = PCA() X = pca.transform(X, n_components=5) # Reduce to 5 dimensions n_samples, n_features = np.shape(X) # .......................... # TRAIN / TEST SPLIT # .......................... X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) # Rescaled labels {-1, 1} rescaled_y_train = 2*y_train - np.ones(np.shape(y_train)) rescaled_y_test = 2*y_test - np.ones(np.shape(y_test)) # ....... # SETUP # ....... adaboost = Adaboost(n_clf = 8) naive_bayes = NaiveBayes() knn = KNN(k=4) logistic_regression = LogisticRegression() mlp = NeuralNetwork(optimizer=Adam(), loss=CrossEntropy) mlp.add(Dense(input_shape=(n_features,), n_units=64)) mlp.add(Activation('relu')) mlp.add(Dense(n_units=64)) mlp.add(Activation('relu')) mlp.add(Dense(n_units=2)) mlp.add(Activation('softmax')) perceptron = Perceptron() decision_tree = ClassificationTree() random_forest = RandomForest(n_estimators=50) support_vector_machine = SupportVectorMachine() lda = LDA() gbc = GradientBoostingClassifier(n_estimators=50, learning_rate=.9, max_depth=2) xgboost = XGBoost(n_estimators=50, learning_rate=0.5) # ........ # TRAIN # ........ print ("Training:") print ("- Adaboost") adaboost.fit(X_train, rescaled_y_train) print ("- Decision Tree") decision_tree.fit(X_train, y_train) print ("- Gradient Boosting") gbc.fit(X_train, y_train) print ("- LDA") lda.fit(X_train, y_train) print ("- Logistic Regression") logistic_regression.fit(X_train, y_train) print ("- Multilayer Perceptron") mlp.fit(X_train, to_categorical(y_train), n_epochs=300, batch_size=50) print ("- Naive Bayes") naive_bayes.fit(X_train, y_train) print ("- Perceptron") perceptron.fit(X_train, to_categorical(y_train)) print ("- Random Forest") random_forest.fit(X_train, y_train) print ("- Support Vector Machine") support_vector_machine.fit(X_train, rescaled_y_train) print ("- XGBoost") xgboost.fit(X_train, y_train) # ......... # PREDICT # ......... y_pred = {} y_pred["Adaboost"] = adaboost.predict(X_test) y_pred["Gradient Boosting"] = gbc.predict(X_test) y_pred["Naive Bayes"] = naive_bayes.predict(X_test) y_pred["K Nearest Neighbors"] = knn.predict(X_test, X_train, y_train) y_pred["Logistic Regression"] = logistic_regression.predict(X_test) y_pred["LDA"] = lda.predict(X_test) y_pred["Multilayer Perceptron"] = np.argmax(mlp.predict(X_test), axis=1) y_pred["Perceptron"] = np.argmax(perceptron.predict(X_test), axis=1) y_pred["Decision Tree"] = decision_tree.predict(X_test) y_pred["Random Forest"] = random_forest.predict(X_test) y_pred["Support Vector Machine"] = support_vector_machine.predict(X_test) y_pred["XGBoost"] = xgboost.predict(X_test) # .......... # ACCURACY # .......... print ("Accuracy:") for clf in y_pred: # Rescaled {-1 1} if clf == "Adaboost" or clf == "Support Vector Machine": print ("\t%-23s: %.5f" %(clf, accuracy_score(rescaled_y_test, y_pred[clf]))) # Categorical else: print ("\t%-23s: %.5f" %(clf, accuracy_score(y_test, y_pred[clf]))) # ....... # PLOT # ....... plt.scatter(X_test[:,0], X_test[:,1], c=y_test) plt.ylabel("Principal Component 2") plt.xlabel("Principal Component 1") plt.title("The Digit Dataset (digits %s and %s)" % (digit1, digit2)) plt.show() ================================================ FILE: mlfromscratch/examples/elastic_net.py ================================================ from __future__ import print_function import matplotlib.pyplot as plt import numpy as np import pandas as pd # Import helper functions from mlfromscratch.supervised_learning import ElasticNet from mlfromscratch.utils import k_fold_cross_validation_sets, normalize, mean_squared_error from mlfromscratch.utils import train_test_split, polynomial_features, Plot def main(): # Load temperature data data = pd.read_csv('mlfromscratch/data/TempLinkoping2016.txt', sep="\t") time = np.atleast_2d(data["time"].values).T temp = data["temp"].values X = time # fraction of the year [0, 1] y = temp X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) poly_degree = 13 model = ElasticNet(degree=15, reg_factor=0.01, l1_ratio=0.7, learning_rate=0.001, n_iterations=4000) model.fit(X_train, y_train) # Training error plot n = len(model.training_errors) training, = plt.plot(range(n), model.training_errors, label="Training Error") plt.legend(handles=[training]) plt.title("Error Plot") plt.ylabel('Mean Squared Error') plt.xlabel('Iterations') plt.show() y_pred = model.predict(X_test) mse = mean_squared_error(y_test, y_pred) print ("Mean squared error: %s (given by reg. factor: %s)" % (mse, 0.05)) y_pred_line = model.predict(X) # Color map cmap = plt.get_cmap('viridis') # Plot the results m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) plt.plot(366 * X, y_pred_line, color='black', linewidth=2, label="Prediction") plt.suptitle("Elastic Net") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') plt.show() if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/fp_growth.py ================================================ import numpy as np from mlfromscratch.unsupervised_learning import FPGrowth def main(): # Demo transaction set # Example: # https://en.wikibooks.org/wiki/Data_Mining_Algorithms_In_R/Frequent_Pattern_Mining/The_FP-Growth_Algorithm transactions = np.array([ ["A", "B", "D", "E"], ["B", "C", "E"], ["A", "B", "D", "E"], ["A", "B", "C", "E"], ["A", "B", "C", "D", "E"], ["B", "C", "D"] ]) print ("") print ("+---------------+") print ("| FP-Growth |") print ("+---------------+") min_sup = 3 print ("Minimum Support: %s" % min_sup) print ("") print ("Transactions:") for transaction in transactions: print ("\t%s" % transaction) fp_growth = FPGrowth(min_sup=min_sup) print ("") # Get and print the frequent itemsets frequent_itemsets = fp_growth.find_frequent_itemsets( transactions, show_tree=True) print ("") print ("Frequent itemsets:") for itemset in frequent_itemsets: print ("\t%s" % itemset) print ("") if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/gaussian_mixture_model.py ================================================ from __future__ import division, print_function import sys import os import math import random from sklearn import datasets import numpy as np from mlfromscratch.unsupervised_learning import GaussianMixtureModel from mlfromscratch.utils import Plot def main(): # Load the dataset X, y = datasets.make_blobs() # Cluster the data clf = GaussianMixtureModel(k=3) y_pred = clf.predict(X) p = Plot() p.plot_in_2d(X, y_pred, title="GMM Clustering") p.plot_in_2d(X, y, title="Actual Clustering") if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/genetic_algorithm.py ================================================ from mlfromscratch.unsupervised_learning import GeneticAlgorithm def main(): target_string = "Genetic Algorithm" population_size = 100 mutation_rate = 0.05 genetic_algorithm = GeneticAlgorithm(target_string, population_size, mutation_rate) print ("") print ("+--------+") print ("| GA |") print ("+--------+") print ("Description: Implementation of a Genetic Algorithm which aims to produce") print ("the user specified target string. This implementation calculates each") print ("candidate's fitness based on the alphabetical distance between the candidate") print ("and the target. A candidate is selected as a parent with probabilities proportional") print ("to the candidate's fitness. Reproduction is implemented as a single-point") print ("crossover between pairs of parents. Mutation is done by randomly assigning") print ("new characters with uniform probability.") print ("") print ("Parameters") print ("----------") print ("Target String: '%s'" % target_string) print ("Population Size: %d" % population_size) print ("Mutation Rate: %s" % mutation_rate) print ("") genetic_algorithm.run(iterations=1000) if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/gradient_boosting_classifier.py ================================================ from __future__ import division, print_function import numpy as np from sklearn import datasets import matplotlib.pyplot as plt # Import helper functions from mlfromscratch.utils import train_test_split, accuracy_score from mlfromscratch.deep_learning.loss_functions import CrossEntropy from mlfromscratch.utils import Plot from mlfromscratch.supervised_learning import GradientBoostingClassifier def main(): print ("-- Gradient Boosting Classification --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = GradientBoostingClassifier() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) Plot().plot_in_2d(X_test, y_pred, title="Gradient Boosting", accuracy=accuracy, legend_labels=data.target_names) if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/gradient_boosting_regressor.py ================================================ from __future__ import division, print_function import numpy as np import pandas as pd import matplotlib.pyplot as plt import progressbar from mlfromscratch.utils import train_test_split, standardize, to_categorical from mlfromscratch.utils import mean_squared_error, accuracy_score, Plot from mlfromscratch.utils.loss_functions import SquareLoss from mlfromscratch.utils.misc import bar_widgets from mlfromscratch.supervised_learning import GradientBoostingRegressor def main(): print ("-- Gradient Boosting Regression --") # Load temperature data data = pd.read_csv('mlfromscratch/data/TempLinkoping2016.txt', sep="\t") time = np.atleast_2d(data["time"].values).T temp = np.atleast_2d(data["temp"].values).T X = time.reshape((-1, 1)) # Time. Fraction of the year [0, 1] X = np.insert(X, 0, values=1, axis=1) # Insert bias term y = temp[:, 0] # Temperature. Reduce to one-dim X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) model = GradientBoostingRegressor() model.fit(X_train, y_train) y_pred = model.predict(X_test) y_pred_line = model.predict(X) # Color map cmap = plt.get_cmap('viridis') mse = mean_squared_error(y_test, y_pred) print ("Mean Squared Error:", mse) # Plot the results m1 = plt.scatter(366 * X_train[:, 1], y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test[:, 1], y_test, color=cmap(0.5), s=10) m3 = plt.scatter(366 * X_test[:, 1], y_pred, color='black', s=10) plt.suptitle("Regression Tree") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2, m3), ("Training data", "Test data", "Prediction"), loc='lower right') plt.show() if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/k_means.py ================================================ from __future__ import division, print_function from sklearn import datasets import numpy as np from mlfromscratch.unsupervised_learning import KMeans from mlfromscratch.utils import Plot def main(): # Load the dataset X, y = datasets.make_blobs() # Cluster the data using K-Means clf = KMeans(k=3) y_pred = clf.predict(X) # Project the data onto the 2 primary principal components p = Plot() p.plot_in_2d(X, y_pred, title="K-Means Clustering") p.plot_in_2d(X, y, title="Actual Clustering") if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/k_nearest_neighbors.py ================================================ from __future__ import print_function import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from mlfromscratch.utils import train_test_split, normalize, accuracy_score from mlfromscratch.utils import euclidean_distance, Plot from mlfromscratch.supervised_learning import KNN def main(): data = datasets.load_iris() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) clf = KNN(k=5) y_pred = clf.predict(X_test, X_train, y_train) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimensions to 2d using pca and plot the results Plot().plot_in_2d(X_test, y_pred, title="K Nearest Neighbors", accuracy=accuracy, legend_labels=data.target_names) if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/lasso_regression.py ================================================ from __future__ import print_function import matplotlib.pyplot as plt import numpy as np import pandas as pd # Import helper functions from mlfromscratch.supervised_learning import LassoRegression from mlfromscratch.utils import k_fold_cross_validation_sets, normalize, mean_squared_error from mlfromscratch.utils import train_test_split, polynomial_features, Plot def main(): # Load temperature data data = pd.read_csv('mlfromscratch/data/TempLinkoping2016.txt', sep="\t") time = np.atleast_2d(data["time"].values).T temp = data["temp"].values X = time # fraction of the year [0, 1] y = temp X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) poly_degree = 13 model = LassoRegression(degree=15, reg_factor=0.05, learning_rate=0.001, n_iterations=4000) model.fit(X_train, y_train) # Training error plot n = len(model.training_errors) training, = plt.plot(range(n), model.training_errors, label="Training Error") plt.legend(handles=[training]) plt.title("Error Plot") plt.ylabel('Mean Squared Error') plt.xlabel('Iterations') plt.show() y_pred = model.predict(X_test) mse = mean_squared_error(y_test, y_pred) print ("Mean squared error: %s (given by reg. factor: %s)" % (mse, 0.05)) y_pred_line = model.predict(X) # Color map cmap = plt.get_cmap('viridis') # Plot the results m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) plt.plot(366 * X, y_pred_line, color='black', linewidth=2, label="Prediction") plt.suptitle("Lasso Regression") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') plt.show() if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/linear_discriminant_analysis.py ================================================ from __future__ import print_function from sklearn import datasets import matplotlib.pyplot as plt import numpy as np from mlfromscratch.supervised_learning import LDA from mlfromscratch.utils import calculate_covariance_matrix, accuracy_score from mlfromscratch.utils import normalize, standardize, train_test_split, Plot from mlfromscratch.unsupervised_learning import PCA def main(): # Load the dataset data = datasets.load_iris() X = data.data y = data.target # Three -> two classes X = X[y != 2] y = y[y != 2] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) # Fit and predict using LDA lda = LDA() lda.fit(X_train, y_train) y_pred = lda.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) Plot().plot_in_2d(X_test, y_pred, title="LDA", accuracy=accuracy) if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/linear_regression.py ================================================ import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.datasets import make_regression from mlfromscratch.utils import train_test_split, polynomial_features from mlfromscratch.utils import mean_squared_error, Plot from mlfromscratch.supervised_learning import LinearRegression def main(): X, y = make_regression(n_samples=100, n_features=1, noise=20) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) n_samples, n_features = np.shape(X) model = LinearRegression(n_iterations=100) model.fit(X_train, y_train) # Training error plot n = len(model.training_errors) training, = plt.plot(range(n), model.training_errors, label="Training Error") plt.legend(handles=[training]) plt.title("Error Plot") plt.ylabel('Mean Squared Error') plt.xlabel('Iterations') plt.show() y_pred = model.predict(X_test) mse = mean_squared_error(y_test, y_pred) print ("Mean squared error: %s" % (mse)) y_pred_line = model.predict(X) # Color map cmap = plt.get_cmap('viridis') # Plot the results m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) plt.plot(366 * X, y_pred_line, color='black', linewidth=2, label="Prediction") plt.suptitle("Linear Regression") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') plt.show() if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/logistic_regression.py ================================================ from __future__ import print_function from sklearn import datasets import numpy as np import matplotlib.pyplot as plt # Import helper functions from mlfromscratch.utils import make_diagonal, normalize, train_test_split, accuracy_score from mlfromscratch.deep_learning.activation_functions import Sigmoid from mlfromscratch.utils import Plot from mlfromscratch.supervised_learning import LogisticRegression def main(): # Load dataset data = datasets.load_iris() X = normalize(data.data[data.target != 0]) y = data.target[data.target != 0] y[y == 1] = 0 y[y == 2] = 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1) clf = LogisticRegression(gradient_descent=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Logistic Regression", accuracy=accuracy) if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/multi_class_lda.py ================================================ from __future__ import print_function from sklearn import datasets import numpy as np from mlfromscratch.supervised_learning import MultiClassLDA from mlfromscratch.utils import normalize def main(): # Load the dataset data = datasets.load_iris() X = normalize(data.data) y = data.target # Project the data onto the 2 primary components multi_class_lda = MultiClassLDA() multi_class_lda.plot_in_2d(X, y, title="LDA") if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/multilayer_perceptron.py ================================================ from __future__ import print_function from sklearn import datasets import matplotlib.pyplot as plt import numpy as np # Import helper functions from mlfromscratch.deep_learning import NeuralNetwork from mlfromscratch.utils import train_test_split, to_categorical, normalize, Plot from mlfromscratch.utils import get_random_subsets, shuffle_data, accuracy_score from mlfromscratch.deep_learning.optimizers import StochasticGradientDescent, Adam, RMSprop, Adagrad, Adadelta from mlfromscratch.deep_learning.loss_functions import CrossEntropy from mlfromscratch.utils.misc import bar_widgets from mlfromscratch.deep_learning.layers import Dense, Dropout, Activation def main(): optimizer = Adam() #----- # MLP #----- data = datasets.load_digits() X = data.data y = data.target # Convert to one-hot encoding y = to_categorical(y.astype("int")) n_samples, n_features = X.shape n_hidden = 512 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1) clf = NeuralNetwork(optimizer=optimizer, loss=CrossEntropy, validation_data=(X_test, y_test)) clf.add(Dense(n_hidden, input_shape=(n_features,))) clf.add(Activation('leaky_relu')) clf.add(Dense(n_hidden)) clf.add(Activation('leaky_relu')) clf.add(Dropout(0.25)) clf.add(Dense(n_hidden)) clf.add(Activation('leaky_relu')) clf.add(Dropout(0.25)) clf.add(Dense(n_hidden)) clf.add(Activation('leaky_relu')) clf.add(Dropout(0.25)) clf.add(Dense(10)) clf.add(Activation('softmax')) print () clf.summary(name="MLP") train_err, val_err = clf.fit(X_train, y_train, n_epochs=50, batch_size=256) # Training and validation error plot n = len(train_err) training, = plt.plot(range(n), train_err, label="Training Error") validation, = plt.plot(range(n), val_err, label="Validation Error") plt.legend(handles=[training, validation]) plt.title("Error Plot") plt.ylabel('Error') plt.xlabel('Iterations') plt.show() _, accuracy = clf.test_on_batch(X_test, y_test) print ("Accuracy:", accuracy) # Reduce dimension to 2D using PCA and plot the results y_pred = np.argmax(clf.predict(X_test), axis=1) Plot().plot_in_2d(X_test, y_pred, title="Multilayer Perceptron", accuracy=accuracy, legend_labels=range(10)) if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/naive_bayes.py ================================================ from __future__ import division, print_function from sklearn import datasets import numpy as np from mlfromscratch.utils import train_test_split, normalize, accuracy_score, Plot from mlfromscratch.supervised_learning import NaiveBayes def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = NaiveBayes() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Naive Bayes", accuracy=accuracy, legend_labels=data.target_names) if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/neuroevolution.py ================================================ from __future__ import print_function from sklearn import datasets import matplotlib.pyplot as plt import numpy as np from mlfromscratch.supervised_learning import Neuroevolution from mlfromscratch.utils import train_test_split, to_categorical, normalize, Plot from mlfromscratch.deep_learning import NeuralNetwork from mlfromscratch.deep_learning.layers import Activation, Dense from mlfromscratch.deep_learning.loss_functions import CrossEntropy from mlfromscratch.deep_learning.optimizers import Adam def main(): X, y = datasets.make_classification(n_samples=1000, n_features=10, n_classes=4, n_clusters_per_class=1, n_informative=2) data = datasets.load_digits() X = normalize(data.data) y = data.target y = to_categorical(y.astype("int")) # Model builder def model_builder(n_inputs, n_outputs): model = NeuralNetwork(optimizer=Adam(), loss=CrossEntropy) model.add(Dense(16, input_shape=(n_inputs,))) model.add(Activation('relu')) model.add(Dense(n_outputs)) model.add(Activation('softmax')) return model # Print the model summary of a individual in the population print ("") model_builder(n_inputs=X.shape[1], n_outputs=y.shape[1]).summary() population_size = 100 n_generations = 3000 mutation_rate = 0.01 print ("Population Size: %d" % population_size) print ("Generations: %d" % n_generations) print ("Mutation Rate: %.2f" % mutation_rate) print ("") X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1) model = Neuroevolution(population_size=population_size, mutation_rate=mutation_rate, model_builder=model_builder) model = model.evolve(X_train, y_train, n_generations=n_generations) loss, accuracy = model.test_on_batch(X_test, y_test) # Reduce dimension to 2D using PCA and plot the results y_pred = np.argmax(model.predict(X_test), axis=1) Plot().plot_in_2d(X_test, y_pred, title="Evolutionary Evolved Neural Network", accuracy=accuracy, legend_labels=range(y.shape[1])) if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/particle_swarm_optimization.py ================================================ from __future__ import print_function from sklearn import datasets import matplotlib.pyplot as plt import numpy as np from mlfromscratch.supervised_learning import ParticleSwarmOptimizedNN from mlfromscratch.utils import train_test_split, to_categorical, normalize, Plot from mlfromscratch.deep_learning import NeuralNetwork from mlfromscratch.deep_learning.layers import Activation, Dense from mlfromscratch.deep_learning.loss_functions import CrossEntropy from mlfromscratch.deep_learning.optimizers import Adam def main(): X, y = datasets.make_classification(n_samples=1000, n_features=10, n_classes=4, n_clusters_per_class=1, n_informative=2) data = datasets.load_iris() X = normalize(data.data) y = data.target y = to_categorical(y.astype("int")) # Model builder def model_builder(n_inputs, n_outputs): model = NeuralNetwork(optimizer=Adam(), loss=CrossEntropy) model.add(Dense(16, input_shape=(n_inputs,))) model.add(Activation('relu')) model.add(Dense(n_outputs)) model.add(Activation('softmax')) return model # Print the model summary of a individual in the population print ("") model_builder(n_inputs=X.shape[1], n_outputs=y.shape[1]).summary() population_size = 100 n_generations = 10 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1) inertia_weight = 0.8 cognitive_weight = 0.8 social_weight = 0.8 print ("Population Size: %d" % population_size) print ("Generations: %d" % n_generations) print ("") print ("Inertia Weight: %.2f" % inertia_weight) print ("Cognitive Weight: %.2f" % cognitive_weight) print ("Social Weight: %.2f" % social_weight) print ("") model = ParticleSwarmOptimizedNN(population_size=population_size, inertia_weight=inertia_weight, cognitive_weight=cognitive_weight, social_weight=social_weight, max_velocity=5, model_builder=model_builder) model = model.evolve(X_train, y_train, n_generations=n_generations) loss, accuracy = model.test_on_batch(X_test, y_test) print ("Accuracy: %.1f%%" % float(100*accuracy)) # Reduce dimension to 2D using PCA and plot the results y_pred = np.argmax(model.predict(X_test), axis=1) Plot().plot_in_2d(X_test, y_pred, title="Particle Swarm Optimized Neural Network", accuracy=accuracy, legend_labels=range(y.shape[1])) if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/partitioning_around_medoids.py ================================================ from sklearn import datasets import numpy as np # Import helper functions from mlfromscratch.utils import Plot from mlfromscratch.unsupervised_learning import PAM def main(): # Load the dataset X, y = datasets.make_blobs() # Cluster the data using K-Medoids clf = PAM(k=3) y_pred = clf.predict(X) # Project the data onto the 2 primary principal components p = Plot() p.plot_in_2d(X, y_pred, title="PAM Clustering") p.plot_in_2d(X, y, title="Actual Clustering") if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/perceptron.py ================================================ from __future__ import print_function from sklearn import datasets import numpy as np # Import helper functions from mlfromscratch.utils import train_test_split, normalize, to_categorical, accuracy_score from mlfromscratch.deep_learning.activation_functions import Sigmoid from mlfromscratch.deep_learning.loss_functions import CrossEntropy from mlfromscratch.utils import Plot from mlfromscratch.supervised_learning import Perceptron def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target # One-hot encoding of nominal y-values y = to_categorical(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1) # Perceptron clf = Perceptron(n_iterations=5000, learning_rate=0.001, loss=CrossEntropy, activation_function=Sigmoid) clf.fit(X_train, y_train) y_pred = np.argmax(clf.predict(X_test), axis=1) y_test = np.argmax(y_test, axis=1) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Perceptron", accuracy=accuracy, legend_labels=np.unique(y)) if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/polynomial_regression.py ================================================ from __future__ import print_function import matplotlib.pyplot as plt import numpy as np import pandas as pd # Import helper functions from mlfromscratch.supervised_learning import PolynomialRidgeRegression from mlfromscratch.utils import k_fold_cross_validation_sets, normalize, mean_squared_error from mlfromscratch.utils import train_test_split, polynomial_features, Plot def main(): # Load temperature data data = pd.read_csv('mlfromscratch/data/TempLinkoping2016.txt', sep="\t") time = np.atleast_2d(data["time"].values).T temp = data["temp"].values X = time # fraction of the year [0, 1] y = temp X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) poly_degree = 15 # Finding regularization constant using cross validation lowest_error = float("inf") best_reg_factor = None print ("Finding regularization constant using cross validation:") k = 10 for reg_factor in np.arange(0, 0.1, 0.01): cross_validation_sets = k_fold_cross_validation_sets( X_train, y_train, k=k) mse = 0 for _X_train, _X_test, _y_train, _y_test in cross_validation_sets: model = PolynomialRidgeRegression(degree=poly_degree, reg_factor=reg_factor, learning_rate=0.001, n_iterations=10000) model.fit(_X_train, _y_train) y_pred = model.predict(_X_test) _mse = mean_squared_error(_y_test, y_pred) mse += _mse mse /= k # Print the mean squared error print ("\tMean Squared Error: %s (regularization: %s)" % (mse, reg_factor)) # Save reg. constant that gave lowest error if mse < lowest_error: best_reg_factor = reg_factor lowest_error = mse # Make final prediction model = PolynomialRidgeRegression(degree=poly_degree, reg_factor=best_reg_factor, learning_rate=0.001, n_iterations=10000) model.fit(X_train, y_train) y_pred = model.predict(X_test) mse = mean_squared_error(y_test, y_pred) print ("Mean squared error: %s (given by reg. factor: %s)" % (lowest_error, best_reg_factor)) y_pred_line = model.predict(X) # Color map cmap = plt.get_cmap('viridis') # Plot the results m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) plt.plot(366 * X, y_pred_line, color='black', linewidth=2, label="Prediction") plt.suptitle("Polynomial Ridge Regression") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') plt.show() if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/principal_component_analysis.py ================================================ from sklearn import datasets import matplotlib.pyplot as plt import matplotlib.cm as cmx import matplotlib.colors as colors import numpy as np from mlfromscratch.unsupervised_learning import PCA def main(): # Demo of how to reduce the dimensionality of the data to two dimension # and plot the results. # Load the dataset data = datasets.load_digits() X = data.data y = data.target # Project the data onto the 2 primary principal components X_trans = PCA().transform(X, 2) x1 = X_trans[:, 0] x2 = X_trans[:, 1] cmap = plt.get_cmap('viridis') colors = [cmap(i) for i in np.linspace(0, 1, len(np.unique(y)))] class_distr = [] # Plot the different class distributions for i, l in enumerate(np.unique(y)): _x1 = x1[y == l] _x2 = x2[y == l] _y = y[y == l] class_distr.append(plt.scatter(_x1, _x2, color=colors[i])) # Add a legend plt.legend(class_distr, y, loc=1) # Axis labels plt.suptitle("PCA Dimensionality Reduction") plt.title("Digit Dataset") plt.xlabel('Principal Component 1') plt.ylabel('Principal Component 2') plt.show() if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/random_forest.py ================================================ from __future__ import division, print_function import numpy as np from sklearn import datasets from mlfromscratch.utils import train_test_split, accuracy_score, Plot from mlfromscratch.supervised_learning import RandomForest def main(): data = datasets.load_digits() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2) clf = RandomForest(n_estimators=100) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) Plot().plot_in_2d(X_test, y_pred, title="Random Forest", accuracy=accuracy, legend_labels=data.target_names) if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/recurrent_neural_network.py ================================================ from __future__ import print_function import matplotlib.pyplot as plt import numpy as np from mlfromscratch.deep_learning import NeuralNetwork from mlfromscratch.utils import train_test_split, to_categorical, normalize, Plot from mlfromscratch.utils import get_random_subsets, shuffle_data, accuracy_score from mlfromscratch.deep_learning.optimizers import StochasticGradientDescent, Adam, RMSprop, Adagrad, Adadelta from mlfromscratch.deep_learning.loss_functions import CrossEntropy from mlfromscratch.utils.misc import bar_widgets from mlfromscratch.deep_learning.layers import RNN, Activation def main(): optimizer = Adam() def gen_mult_ser(nums): """ Method which generates multiplication series """ X = np.zeros([nums, 10, 61], dtype=float) y = np.zeros([nums, 10, 61], dtype=float) for i in range(nums): start = np.random.randint(2, 7) mult_ser = np.linspace(start, start*10, num=10, dtype=int) X[i] = to_categorical(mult_ser, n_col=61) y[i] = np.roll(X[i], -1, axis=0) y[:, -1, 1] = 1 # Mark endpoint as 1 return X, y def gen_num_seq(nums): """ Method which generates sequence of numbers """ X = np.zeros([nums, 10, 20], dtype=float) y = np.zeros([nums, 10, 20], dtype=float) for i in range(nums): start = np.random.randint(0, 10) num_seq = np.arange(start, start+10) X[i] = to_categorical(num_seq, n_col=20) y[i] = np.roll(X[i], -1, axis=0) y[:, -1, 1] = 1 # Mark endpoint as 1 return X, y X, y = gen_mult_ser(3000) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) # Model definition clf = NeuralNetwork(optimizer=optimizer, loss=CrossEntropy) clf.add(RNN(10, activation="tanh", bptt_trunc=5, input_shape=(10, 61))) clf.add(Activation('softmax')) clf.summary("RNN") # Print a problem instance and the correct solution tmp_X = np.argmax(X_train[0], axis=1) tmp_y = np.argmax(y_train[0], axis=1) print ("Number Series Problem:") print ("X = [" + " ".join(tmp_X.astype("str")) + "]") print ("y = [" + " ".join(tmp_y.astype("str")) + "]") print () train_err, _ = clf.fit(X_train, y_train, n_epochs=500, batch_size=512) # Predict labels of the test data y_pred = np.argmax(clf.predict(X_test), axis=2) y_test = np.argmax(y_test, axis=2) print () print ("Results:") for i in range(5): # Print a problem instance and the correct solution tmp_X = np.argmax(X_test[i], axis=1) tmp_y1 = y_test[i] tmp_y2 = y_pred[i] print ("X = [" + " ".join(tmp_X.astype("str")) + "]") print ("y_true = [" + " ".join(tmp_y1.astype("str")) + "]") print ("y_pred = [" + " ".join(tmp_y2.astype("str")) + "]") print () accuracy = np.mean(accuracy_score(y_test, y_pred)) print ("Accuracy:", accuracy) training = plt.plot(range(500), train_err, label="Training Error") plt.title("Error Plot") plt.ylabel('Training Error') plt.xlabel('Iterations') plt.show() if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/restricted_boltzmann_machine.py ================================================ import logging import numpy as np from sklearn import datasets from sklearn.datasets import fetch_mldata import matplotlib.pyplot as plt from mlfromscratch.unsupervised_learning import RBM logging.basicConfig(level=logging.DEBUG) def main(): mnist = fetch_mldata('MNIST original') X = mnist.data / 255.0 y = mnist.target # Select the samples of the digit 2 X = X[y == 2] # Limit dataset to 500 samples idx = np.random.choice(range(X.shape[0]), size=500, replace=False) X = X[idx] rbm = RBM(n_hidden=50, n_iterations=200, batch_size=25, learning_rate=0.001) rbm.fit(X) # Training error plot training, = plt.plot(range(len(rbm.training_errors)), rbm.training_errors, label="Training Error") plt.legend(handles=[training]) plt.title("Error Plot") plt.ylabel('Error') plt.xlabel('Iterations') plt.show() # Get the images that were reconstructed during training gen_imgs = rbm.training_reconstructions # Plot the reconstructed images during the first iteration fig, axs = plt.subplots(5, 5) plt.suptitle("Restricted Boltzmann Machine - First Iteration") cnt = 0 for i in range(5): for j in range(5): axs[i,j].imshow(gen_imgs[0][cnt].reshape((28, 28)), cmap='gray') axs[i,j].axis('off') cnt += 1 fig.savefig("rbm_first.png") plt.close() # Plot the images during the last iteration fig, axs = plt.subplots(5, 5) plt.suptitle("Restricted Boltzmann Machine - Last Iteration") cnt = 0 for i in range(5): for j in range(5): axs[i,j].imshow(gen_imgs[-1][cnt].reshape((28, 28)), cmap='gray') axs[i,j].axis('off') cnt += 1 fig.savefig("rbm_last.png") plt.close() if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/ridge_regression.py ================================================ from __future__ import print_function import matplotlib.pyplot as plt import numpy as np import pandas as pd # Import helper functions from mlfromscratch.supervised_learning import PolynomialRidgeRegression from mlfromscratch.utils import k_fold_cross_validation_sets, normalize, Plot from mlfromscratch.utils import train_test_split, polynomial_features, mean_squared_error def main(): # Load temperature data data = pd.read_csv('mlfromscratch/data/TempLinkoping2016.txt', sep="\t") time = np.atleast_2d(data["time"].values).T temp = data["temp"].values X = time # fraction of the year [0, 1] y = temp X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) poly_degree = 15 # Finding regularization constant using cross validation lowest_error = float("inf") best_reg_factor = None print ("Finding regularization constant using cross validation:") k = 10 for reg_factor in np.arange(0, 0.1, 0.01): cross_validation_sets = k_fold_cross_validation_sets( X_train, y_train, k=k) mse = 0 for _X_train, _X_test, _y_train, _y_test in cross_validation_sets: model = PolynomialRidgeRegression(degree=poly_degree, reg_factor=reg_factor, learning_rate=0.001, n_iterations=10000) model.fit(_X_train, _y_train) y_pred = model.predict(_X_test) _mse = mean_squared_error(_y_test, y_pred) mse += _mse mse /= k # Print the mean squared error print ("\tMean Squared Error: %s (regularization: %s)" % (mse, reg_factor)) # Save reg. constant that gave lowest error if mse < lowest_error: best_reg_factor = reg_factor lowest_error = mse # Make final prediction model = PolynomialRidgeRegression(degree=poly_degree, reg_factor=reg_factor, learning_rate=0.001, n_iterations=10000) model.fit(X_train, y_train) y_pred = model.predict(X_test) mse = mean_squared_error(y_test, y_pred) print ("Mean squared error: %s (given by reg. factor: %s)" % (mse, reg_factor)) y_pred_line = model.predict(X) # Color map cmap = plt.get_cmap('viridis') # Plot the results m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) plt.plot(366 * X, y_pred_line, color='black', linewidth=2, label="Prediction") plt.suptitle("Polynomial Ridge Regression") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') plt.show() if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/support_vector_machine.py ================================================ from __future__ import division, print_function import numpy as np from sklearn import datasets # Import helper functions from mlfromscratch.utils import train_test_split, normalize, accuracy_score, Plot from mlfromscratch.utils.kernels import * from mlfromscratch.supervised_learning import SupportVectorMachine def main(): data = datasets.load_iris() X = normalize(data.data[data.target != 0]) y = data.target[data.target != 0] y[y == 1] = -1 y[y == 2] = 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) clf = SupportVectorMachine(kernel=polynomial_kernel, power=4, coef=1) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Support Vector Machine", accuracy=accuracy) if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/examples/xgboost.py ================================================ from __future__ import division, print_function import numpy as np from sklearn import datasets import matplotlib.pyplot as plt import progressbar from mlfromscratch.utils import train_test_split, standardize, to_categorical, normalize from mlfromscratch.utils import mean_squared_error, accuracy_score, Plot from mlfromscratch.supervised_learning import XGBoost def main(): print ("-- XGBoost --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2) clf = XGBoost() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) Plot().plot_in_2d(X_test, y_pred, title="XGBoost", accuracy=accuracy, legend_labels=data.target_names) if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/reinforcement_learning/__init__.py ================================================ from .deep_q_network import DeepQNetwork ================================================ FILE: mlfromscratch/reinforcement_learning/deep_q_network.py ================================================ from __future__ import print_function, division import random import numpy as np import gym from collections import deque class DeepQNetwork(): """Q-Learning with deep neural network to learn the control policy. Uses a deep neural network model to predict the expected utility (Q-value) of executing an action in a given state. Reference: https://arxiv.org/abs/1312.5602 Parameters: ----------- env_name: string The environment that the agent will explore. Check: https://gym.openai.com/envs epsilon: float The epsilon-greedy value. The probability that the agent should select a random action instead of the action that will maximize the expected utility. gamma: float Determines how much the agent should consider future rewards. decay_rate: float The rate of decay for the epsilon value after each epoch. min_epsilon: float The value which epsilon will approach as the training progresses. """ def __init__(self, env_name='CartPole-v1', epsilon=1, gamma=0.9, decay_rate=0.005, min_epsilon=0.1): self.epsilon = epsilon self.gamma = gamma self.decay_rate = decay_rate self.min_epsilon = min_epsilon self.memory_size = 300 self.memory = [] # Initialize the environment self.env = gym.make(env_name) self.n_states = self.env.observation_space.shape[0] self.n_actions = self.env.action_space.n def set_model(self, model): self.model = model(n_inputs=self.n_states, n_outputs=self.n_actions) def _select_action(self, state): if np.random.rand() < self.epsilon: # Choose action randomly action = np.random.randint(self.n_actions) else: # Take action with highest predicted utility given state action = np.argmax(self.model.predict(state), axis=1)[0] return action def _memorize(self, state, action, reward, new_state, done): self.memory.append((state, action, reward, new_state, done)) # Make sure we restrict memory size to specified limit if len(self.memory) > self.memory_size: self.memory.pop(0) def _construct_training_set(self, replay): # Select states and new states from replay states = np.array([a[0] for a in replay]) new_states = np.array([a[3] for a in replay]) # Predict the expected utility of current state and new state Q = self.model.predict(states) Q_new = self.model.predict(new_states) replay_size = len(replay) X = np.empty((replay_size, self.n_states)) y = np.empty((replay_size, self.n_actions)) # Construct training set for i in range(replay_size): state_r, action_r, reward_r, new_state_r, done_r = replay[i] target = Q[i] target[action_r] = reward_r # If we're done the utility is simply the reward of executing action a in # state s, otherwise we add the expected maximum future reward as well if not done_r: target[action_r] += self.gamma * np.amax(Q_new[i]) X[i] = state_r y[i] = target return X, y def train(self, n_epochs=500, batch_size=32): max_reward = 0 for epoch in range(n_epochs): state = self.env.reset() total_reward = 0 epoch_loss = [] while True: action = self._select_action(state) # Take a step new_state, reward, done, _ = self.env.step(action) self._memorize(state, action, reward, new_state, done) # Sample replay batch from memory _batch_size = min(len(self.memory), batch_size) replay = random.sample(self.memory, _batch_size) # Construct training set from replay X, y = self._construct_training_set(replay) # Learn control policy loss = self.model.train_on_batch(X, y) epoch_loss.append(loss) total_reward += reward state = new_state if done: break epoch_loss = np.mean(epoch_loss) # Reduce the epsilon parameter self.epsilon = self.min_epsilon + (1.0 - self.min_epsilon) * np.exp(-self.decay_rate * epoch) max_reward = max(max_reward, total_reward) print ("%d [Loss: %.4f, Reward: %s, Epsilon: %.4f, Max Reward: %s]" % (epoch, epoch_loss, total_reward, self.epsilon, max_reward)) print ("Training Finished") def play(self, n_epochs): # self.env = gym.wrappers.Monitor(self.env, '/tmp/cartpole-experiment-1', force=True) for epoch in range(n_epochs): state = self.env.reset() total_reward = 0 while True: self.env.render() action = np.argmax(self.model.predict(state), axis=1)[0] state, reward, done, _ = self.env.step(action) total_reward += reward if done: break print ("%d Reward: %s" % (epoch, total_reward)) self.env.close() ================================================ FILE: mlfromscratch/supervised_learning/__init__.py ================================================ from .adaboost import Adaboost from .bayesian_regression import BayesianRegression from .decision_tree import RegressionTree, ClassificationTree, XGBoostRegressionTree from .gradient_boosting import GradientBoostingClassifier, GradientBoostingRegressor from .k_nearest_neighbors import KNN from .linear_discriminant_analysis import LDA from .regression import LinearRegression, PolynomialRegression, LassoRegression from .regression import RidgeRegression, PolynomialRidgeRegression, ElasticNet from .logistic_regression import LogisticRegression from .multi_class_lda import MultiClassLDA from .naive_bayes import NaiveBayes from .perceptron import Perceptron from .random_forest import RandomForest from .support_vector_machine import SupportVectorMachine from .xgboost import XGBoost from .neuroevolution import Neuroevolution from .particle_swarm_optimization import ParticleSwarmOptimizedNN ================================================ FILE: mlfromscratch/supervised_learning/adaboost.py ================================================ from __future__ import division, print_function import numpy as np import math from sklearn import datasets import matplotlib.pyplot as plt import pandas as pd # Import helper functions from mlfromscratch.utils import train_test_split, accuracy_score, Plot # Decision stump used as weak classifier in this impl. of Adaboost class DecisionStump(): def __init__(self): # Determines if sample shall be classified as -1 or 1 given threshold self.polarity = 1 # The index of the feature used to make classification self.feature_index = None # The threshold value that the feature should be measured against self.threshold = None # Value indicative of the classifier's accuracy self.alpha = None class Adaboost(): """Boosting method that uses a number of weak classifiers in ensemble to make a strong classifier. This implementation uses decision stumps, which is a one level Decision Tree. Parameters: ----------- n_clf: int The number of weak classifiers that will be used. """ def __init__(self, n_clf=5): self.n_clf = n_clf def fit(self, X, y): n_samples, n_features = np.shape(X) # Initialize weights to 1/N w = np.full(n_samples, (1 / n_samples)) self.clfs = [] # Iterate through classifiers for _ in range(self.n_clf): clf = DecisionStump() # Minimum error given for using a certain feature value threshold # for predicting sample label min_error = float('inf') # Iterate throught every unique feature value and see what value # makes the best threshold for predicting y for feature_i in range(n_features): feature_values = np.expand_dims(X[:, feature_i], axis=1) unique_values = np.unique(feature_values) # Try every unique feature value as threshold for threshold in unique_values: p = 1 # Set all predictions to '1' initially prediction = np.ones(np.shape(y)) # Label the samples whose values are below threshold as '-1' prediction[X[:, feature_i] < threshold] = -1 # Error = sum of weights of misclassified samples error = sum(w[y != prediction]) # If the error is over 50% we flip the polarity so that samples that # were classified as 0 are classified as 1, and vice versa # E.g error = 0.8 => (1 - error) = 0.2 if error > 0.5: error = 1 - error p = -1 # If this threshold resulted in the smallest error we save the # configuration if error < min_error: clf.polarity = p clf.threshold = threshold clf.feature_index = feature_i min_error = error # Calculate the alpha which is used to update the sample weights, # Alpha is also an approximation of this classifier's proficiency clf.alpha = 0.5 * math.log((1.0 - min_error) / (min_error + 1e-10)) # Set all predictions to '1' initially predictions = np.ones(np.shape(y)) # The indexes where the sample values are below threshold negative_idx = (clf.polarity * X[:, clf.feature_index] < clf.polarity * clf.threshold) # Label those as '-1' predictions[negative_idx] = -1 # Calculate new weights # Missclassified samples gets larger weights and correctly classified samples smaller w *= np.exp(-clf.alpha * y * predictions) # Normalize to one w /= np.sum(w) # Save classifier self.clfs.append(clf) def predict(self, X): n_samples = np.shape(X)[0] y_pred = np.zeros((n_samples, 1)) # For each classifier => label the samples for clf in self.clfs: # Set all predictions to '1' initially predictions = np.ones(np.shape(y_pred)) # The indexes where the sample values are below threshold negative_idx = (clf.polarity * X[:, clf.feature_index] < clf.polarity * clf.threshold) # Label those as '-1' predictions[negative_idx] = -1 # Add predictions weighted by the classifiers alpha # (alpha indicative of classifier's proficiency) y_pred += clf.alpha * predictions # Return sign of prediction sum y_pred = np.sign(y_pred).flatten() return y_pred def main(): data = datasets.load_digits() X = data.data y = data.target digit1 = 1 digit2 = 8 idx = np.append(np.where(y == digit1)[0], np.where(y == digit2)[0]) y = data.target[idx] # Change labels to {-1, 1} y[y == digit1] = -1 y[y == digit2] = 1 X = data.data[idx] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) # Adaboost classification with 5 weak classifiers clf = Adaboost(n_clf=5) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimensions to 2d using pca and plot the results Plot().plot_in_2d(X_test, y_pred, title="Adaboost", accuracy=accuracy) if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/supervised_learning/bayesian_regression.py ================================================ from __future__ import print_function, division import numpy as np from scipy.stats import chi2, multivariate_normal from mlfromscratch.utils import mean_squared_error, train_test_split, polynomial_features class BayesianRegression(object): """Bayesian regression model. If poly_degree is specified the features will be transformed to with a polynomial basis function, which allows for polynomial regression. Assumes Normal prior and likelihood for the weights and scaled inverse chi-squared prior and likelihood for the variance of the weights. Parameters: ----------- n_draws: float The number of simulated draws from the posterior of the parameters. mu0: array The mean values of the prior Normal distribution of the parameters. omega0: array The precision matrix of the prior Normal distribution of the parameters. nu0: float The degrees of freedom of the prior scaled inverse chi squared distribution. sigma_sq0: float The scale parameter of the prior scaled inverse chi squared distribution. poly_degree: int The polynomial degree that the features should be transformed to. Allows for polynomial regression. cred_int: float The credible interval (ETI in this impl.). 95 => 95% credible interval of the posterior of the parameters. Reference: https://github.com/mattiasvillani/BayesLearnCourse/raw/master/Slides/BayesLearnL5.pdf """ def __init__(self, n_draws, mu0, omega0, nu0, sigma_sq0, poly_degree=0, cred_int=95): self.w = None self.n_draws = n_draws self.poly_degree = poly_degree self.cred_int = cred_int # Prior parameters self.mu0 = mu0 self.omega0 = omega0 self.nu0 = nu0 self.sigma_sq0 = sigma_sq0 # Allows for simulation from the scaled inverse chi squared # distribution. Assumes the variance is distributed according to # this distribution. # Reference: # https://en.wikipedia.org/wiki/Scaled_inverse_chi-squared_distribution def _draw_scaled_inv_chi_sq(self, n, df, scale): X = chi2.rvs(size=n, df=df) sigma_sq = df * scale / X return sigma_sq def fit(self, X, y): # If polynomial transformation if self.poly_degree: X = polynomial_features(X, degree=self.poly_degree) n_samples, n_features = np.shape(X) X_X = X.T.dot(X) # Least squares approximate of beta beta_hat = np.linalg.pinv(X_X).dot(X.T).dot(y) # The posterior parameters can be determined analytically since we assume # conjugate priors for the likelihoods. # Normal prior / likelihood => Normal posterior mu_n = np.linalg.pinv(X_X + self.omega0).dot(X_X.dot(beta_hat)+self.omega0.dot(self.mu0)) omega_n = X_X + self.omega0 # Scaled inverse chi-squared prior / likelihood => Scaled inverse chi-squared posterior nu_n = self.nu0 + n_samples sigma_sq_n = (1.0/nu_n)*(self.nu0*self.sigma_sq0 + \ (y.T.dot(y) + self.mu0.T.dot(self.omega0).dot(self.mu0) - mu_n.T.dot(omega_n.dot(mu_n)))) # Simulate parameter values for n_draws beta_draws = np.empty((self.n_draws, n_features)) for i in range(self.n_draws): sigma_sq = self._draw_scaled_inv_chi_sq(n=1, df=nu_n, scale=sigma_sq_n) beta = multivariate_normal.rvs(size=1, mean=mu_n[:,0], cov=sigma_sq*np.linalg.pinv(omega_n)) # Save parameter draws beta_draws[i, :] = beta # Select the mean of the simulated variables as the ones used to make predictions self.w = np.mean(beta_draws, axis=0) # Lower and upper boundary of the credible interval l_eti = 50 - self.cred_int/2 u_eti = 50 + self.cred_int/2 self.eti = np.array([[np.percentile(beta_draws[:,i], q=l_eti), np.percentile(beta_draws[:,i], q=u_eti)] \ for i in range(n_features)]) def predict(self, X, eti=False): # If polynomial transformation if self.poly_degree: X = polynomial_features(X, degree=self.poly_degree) y_pred = X.dot(self.w) # If the lower and upper boundaries for the 95% # equal tail interval should be returned if eti: lower_w = self.eti[:, 0] upper_w = self.eti[:, 1] y_lower_pred = X.dot(lower_w) y_upper_pred = X.dot(upper_w) return y_pred, y_lower_pred, y_upper_pred return y_pred ================================================ FILE: mlfromscratch/supervised_learning/decision_tree.py ================================================ from __future__ import division, print_function import numpy as np from mlfromscratch.utils import divide_on_feature, train_test_split, standardize, mean_squared_error from mlfromscratch.utils import calculate_entropy, accuracy_score, calculate_variance class DecisionNode(): """Class that represents a decision node or leaf in the decision tree Parameters: ----------- feature_i: int Feature index which we want to use as the threshold measure. threshold: float The value that we will compare feature values at feature_i against to determine the prediction. value: float The class prediction if classification tree, or float value if regression tree. true_branch: DecisionNode Next decision node for samples where features value met the threshold. false_branch: DecisionNode Next decision node for samples where features value did not meet the threshold. """ def __init__(self, feature_i=None, threshold=None, value=None, true_branch=None, false_branch=None): self.feature_i = feature_i # Index for the feature that is tested self.threshold = threshold # Threshold value for feature self.value = value # Value if the node is a leaf in the tree self.true_branch = true_branch # 'Left' subtree self.false_branch = false_branch # 'Right' subtree # Super class of RegressionTree and ClassificationTree class DecisionTree(object): """Super class of RegressionTree and ClassificationTree. Parameters: ----------- min_samples_split: int The minimum number of samples needed to make a split when building a tree. min_impurity: float The minimum impurity required to split the tree further. max_depth: int The maximum depth of a tree. loss: function Loss function that is used for Gradient Boosting models to calculate impurity. """ def __init__(self, min_samples_split=2, min_impurity=1e-7, max_depth=float("inf"), loss=None): self.root = None # Root node in dec. tree # Minimum n of samples to justify split self.min_samples_split = min_samples_split # The minimum impurity to justify split self.min_impurity = min_impurity # The maximum depth to grow the tree to self.max_depth = max_depth # Function to calculate impurity (classif.=>info gain, regr=>variance reduct.) self._impurity_calculation = None # Function to determine prediction of y at leaf self._leaf_value_calculation = None # If y is one-hot encoded (multi-dim) or not (one-dim) self.one_dim = None # If Gradient Boost self.loss = loss def fit(self, X, y, loss=None): """ Build decision tree """ self.one_dim = len(np.shape(y)) == 1 self.root = self._build_tree(X, y) self.loss=None def _build_tree(self, X, y, current_depth=0): """ Recursive method which builds out the decision tree and splits X and respective y on the feature of X which (based on impurity) best separates the data""" largest_impurity = 0 best_criteria = None # Feature index and threshold best_sets = None # Subsets of the data # Check if expansion of y is needed if len(np.shape(y)) == 1: y = np.expand_dims(y, axis=1) # Add y as last column of X Xy = np.concatenate((X, y), axis=1) n_samples, n_features = np.shape(X) if n_samples >= self.min_samples_split and current_depth <= self.max_depth: # Calculate the impurity for each feature for feature_i in range(n_features): # All values of feature_i feature_values = np.expand_dims(X[:, feature_i], axis=1) unique_values = np.unique(feature_values) # Iterate through all unique values of feature column i and # calculate the impurity for threshold in unique_values: # Divide X and y depending on if the feature value of X at index feature_i # meets the threshold Xy1, Xy2 = divide_on_feature(Xy, feature_i, threshold) if len(Xy1) > 0 and len(Xy2) > 0: # Select the y-values of the two sets y1 = Xy1[:, n_features:] y2 = Xy2[:, n_features:] # Calculate impurity impurity = self._impurity_calculation(y, y1, y2) # If this threshold resulted in a higher information gain than previously # recorded save the threshold value and the feature # index if impurity > largest_impurity: largest_impurity = impurity best_criteria = {"feature_i": feature_i, "threshold": threshold} best_sets = { "leftX": Xy1[:, :n_features], # X of left subtree "lefty": Xy1[:, n_features:], # y of left subtree "rightX": Xy2[:, :n_features], # X of right subtree "righty": Xy2[:, n_features:] # y of right subtree } if largest_impurity > self.min_impurity: # Build subtrees for the right and left branches true_branch = self._build_tree(best_sets["leftX"], best_sets["lefty"], current_depth + 1) false_branch = self._build_tree(best_sets["rightX"], best_sets["righty"], current_depth + 1) return DecisionNode(feature_i=best_criteria["feature_i"], threshold=best_criteria[ "threshold"], true_branch=true_branch, false_branch=false_branch) # We're at leaf => determine value leaf_value = self._leaf_value_calculation(y) return DecisionNode(value=leaf_value) def predict_value(self, x, tree=None): """ Do a recursive search down the tree and make a prediction of the data sample by the value of the leaf that we end up at """ if tree is None: tree = self.root # If we have a value (i.e we're at a leaf) => return value as the prediction if tree.value is not None: return tree.value # Choose the feature that we will test feature_value = x[tree.feature_i] # Determine if we will follow left or right branch branch = tree.false_branch if isinstance(feature_value, int) or isinstance(feature_value, float): if feature_value >= tree.threshold: branch = tree.true_branch elif feature_value == tree.threshold: branch = tree.true_branch # Test subtree return self.predict_value(x, branch) def predict(self, X): """ Classify samples one by one and return the set of labels """ y_pred = [self.predict_value(sample) for sample in X] return y_pred def print_tree(self, tree=None, indent=" "): """ Recursively print the decision tree """ if not tree: tree = self.root # If we're at leaf => print the label if tree.value is not None: print (tree.value) # Go deeper down the tree else: # Print test print ("%s:%s? " % (tree.feature_i, tree.threshold)) # Print the true scenario print ("%sT->" % (indent), end="") self.print_tree(tree.true_branch, indent + indent) # Print the false scenario print ("%sF->" % (indent), end="") self.print_tree(tree.false_branch, indent + indent) class XGBoostRegressionTree(DecisionTree): """ Regression tree for XGBoost - Reference - http://xgboost.readthedocs.io/en/latest/model.html """ def _split(self, y): """ y contains y_true in left half of the middle column and y_pred in the right half. Split and return the two matrices """ col = int(np.shape(y)[1]/2) y, y_pred = y[:, :col], y[:, col:] return y, y_pred def _gain(self, y, y_pred): nominator = np.power((y * self.loss.gradient(y, y_pred)).sum(), 2) denominator = self.loss.hess(y, y_pred).sum() return 0.5 * (nominator / denominator) def _gain_by_taylor(self, y, y1, y2): # Split y, y_pred = self._split(y) y1, y1_pred = self._split(y1) y2, y2_pred = self._split(y2) true_gain = self._gain(y1, y1_pred) false_gain = self._gain(y2, y2_pred) gain = self._gain(y, y_pred) return true_gain + false_gain - gain def _approximate_update(self, y): # y split into y, y_pred y, y_pred = self._split(y) # Newton's Method gradient = np.sum(y * self.loss.gradient(y, y_pred), axis=0) hessian = np.sum(self.loss.hess(y, y_pred), axis=0) update_approximation = gradient / hessian return update_approximation def fit(self, X, y): self._impurity_calculation = self._gain_by_taylor self._leaf_value_calculation = self._approximate_update super(XGBoostRegressionTree, self).fit(X, y) class RegressionTree(DecisionTree): def _calculate_variance_reduction(self, y, y1, y2): var_tot = calculate_variance(y) var_1 = calculate_variance(y1) var_2 = calculate_variance(y2) frac_1 = len(y1) / len(y) frac_2 = len(y2) / len(y) # Calculate the variance reduction variance_reduction = var_tot - (frac_1 * var_1 + frac_2 * var_2) return sum(variance_reduction) def _mean_of_y(self, y): value = np.mean(y, axis=0) return value if len(value) > 1 else value[0] def fit(self, X, y): self._impurity_calculation = self._calculate_variance_reduction self._leaf_value_calculation = self._mean_of_y super(RegressionTree, self).fit(X, y) class ClassificationTree(DecisionTree): def _calculate_information_gain(self, y, y1, y2): # Calculate information gain p = len(y1) / len(y) entropy = calculate_entropy(y) info_gain = entropy - p * \ calculate_entropy(y1) - (1 - p) * \ calculate_entropy(y2) return info_gain def _majority_vote(self, y): most_common = None max_count = 0 for label in np.unique(y): # Count number of occurences of samples with label count = len(y[y == label]) if count > max_count: most_common = label max_count = count return most_common def fit(self, X, y): self._impurity_calculation = self._calculate_information_gain self._leaf_value_calculation = self._majority_vote super(ClassificationTree, self).fit(X, y) ================================================ FILE: mlfromscratch/supervised_learning/gradient_boosting.py ================================================ from __future__ import division, print_function import numpy as np import progressbar # Import helper functions from mlfromscratch.utils import train_test_split, standardize, to_categorical from mlfromscratch.utils import mean_squared_error, accuracy_score from mlfromscratch.deep_learning.loss_functions import SquareLoss, CrossEntropy from mlfromscratch.supervised_learning.decision_tree import RegressionTree from mlfromscratch.utils.misc import bar_widgets class GradientBoosting(object): """Super class of GradientBoostingClassifier and GradientBoostinRegressor. Uses a collection of regression trees that trains on predicting the gradient of the loss function. Parameters: ----------- n_estimators: int The number of classification trees that are used. learning_rate: float The step length that will be taken when following the negative gradient during training. min_samples_split: int The minimum number of samples needed to make a split when building a tree. min_impurity: float The minimum impurity required to split the tree further. max_depth: int The maximum depth of a tree. regression: boolean True or false depending on if we're doing regression or classification. """ def __init__(self, n_estimators, learning_rate, min_samples_split, min_impurity, max_depth, regression): self.n_estimators = n_estimators self.learning_rate = learning_rate self.min_samples_split = min_samples_split self.min_impurity = min_impurity self.max_depth = max_depth self.regression = regression self.bar = progressbar.ProgressBar(widgets=bar_widgets) # Square loss for regression # Log loss for classification self.loss = SquareLoss() if not self.regression: self.loss = CrossEntropy() # Initialize regression trees self.trees = [] for _ in range(n_estimators): tree = RegressionTree( min_samples_split=self.min_samples_split, min_impurity=min_impurity, max_depth=self.max_depth) self.trees.append(tree) def fit(self, X, y): y_pred = np.full(np.shape(y), np.mean(y, axis=0)) for i in self.bar(range(self.n_estimators)): gradient = self.loss.gradient(y, y_pred) self.trees[i].fit(X, gradient) update = self.trees[i].predict(X) # Update y prediction y_pred -= np.multiply(self.learning_rate, update) def predict(self, X): y_pred = np.array([]) # Make predictions for tree in self.trees: update = tree.predict(X) update = np.multiply(self.learning_rate, update) y_pred = -update if not y_pred.any() else y_pred - update if not self.regression: # Turn into probability distribution y_pred = np.exp(y_pred) / np.expand_dims(np.sum(np.exp(y_pred), axis=1), axis=1) # Set label to the value that maximizes probability y_pred = np.argmax(y_pred, axis=1) return y_pred class GradientBoostingRegressor(GradientBoosting): def __init__(self, n_estimators=200, learning_rate=0.5, min_samples_split=2, min_var_red=1e-7, max_depth=4, debug=False): super(GradientBoostingRegressor, self).__init__(n_estimators=n_estimators, learning_rate=learning_rate, min_samples_split=min_samples_split, min_impurity=min_var_red, max_depth=max_depth, regression=True) class GradientBoostingClassifier(GradientBoosting): def __init__(self, n_estimators=200, learning_rate=.5, min_samples_split=2, min_info_gain=1e-7, max_depth=2, debug=False): super(GradientBoostingClassifier, self).__init__(n_estimators=n_estimators, learning_rate=learning_rate, min_samples_split=min_samples_split, min_impurity=min_info_gain, max_depth=max_depth, regression=False) def fit(self, X, y): y = to_categorical(y) super(GradientBoostingClassifier, self).fit(X, y) ================================================ FILE: mlfromscratch/supervised_learning/k_nearest_neighbors.py ================================================ from __future__ import print_function, division import numpy as np from mlfromscratch.utils import euclidean_distance class KNN(): """ K Nearest Neighbors classifier. Parameters: ----------- k: int The number of closest neighbors that will determine the class of the sample that we wish to predict. """ def __init__(self, k=5): self.k = k def _vote(self, neighbor_labels): """ Return the most common class among the neighbor samples """ counts = np.bincount(neighbor_labels.astype('int')) return counts.argmax() def predict(self, X_test, X_train, y_train): y_pred = np.empty(X_test.shape[0]) # Determine the class of each sample for i, test_sample in enumerate(X_test): # Sort the training samples by their distance to the test sample and get the K nearest idx = np.argsort([euclidean_distance(test_sample, x) for x in X_train])[:self.k] # Extract the labels of the K nearest neighboring training samples k_nearest_neighbors = np.array([y_train[i] for i in idx]) # Label sample as the most common class label y_pred[i] = self._vote(k_nearest_neighbors) return y_pred ================================================ FILE: mlfromscratch/supervised_learning/linear_discriminant_analysis.py ================================================ from __future__ import print_function, division import numpy as np from mlfromscratch.utils import calculate_covariance_matrix, normalize, standardize class LDA(): """The Linear Discriminant Analysis classifier, also known as Fisher's linear discriminant. Can besides from classification also be used to reduce the dimensionaly of the dataset. """ def __init__(self): self.w = None def transform(self, X, y): self.fit(X, y) # Project data onto vector X_transform = X.dot(self.w) return X_transform def fit(self, X, y): # Separate data by class X1 = X[y == 0] X2 = X[y == 1] # Calculate the covariance matrices of the two datasets cov1 = calculate_covariance_matrix(X1) cov2 = calculate_covariance_matrix(X2) cov_tot = cov1 + cov2 # Calculate the mean of the two datasets mean1 = X1.mean(0) mean2 = X2.mean(0) mean_diff = np.atleast_1d(mean1 - mean2) # Determine the vector which when X is projected onto it best separates the # data by class. w = (mean1 - mean2) / (cov1 + cov2) self.w = np.linalg.pinv(cov_tot).dot(mean_diff) def predict(self, X): y_pred = [] for sample in X: h = sample.dot(self.w) y = 1 * (h < 0) y_pred.append(y) return y_pred ================================================ FILE: mlfromscratch/supervised_learning/logistic_regression.py ================================================ from __future__ import print_function, division import numpy as np import math from mlfromscratch.utils import make_diagonal, Plot from mlfromscratch.deep_learning.activation_functions import Sigmoid class LogisticRegression(): """ Logistic Regression classifier. Parameters: ----------- learning_rate: float The step length that will be taken when following the negative gradient during training. gradient_descent: boolean True or false depending if gradient descent should be used when training. If false then we use batch optimization by least squares. """ def __init__(self, learning_rate=.1, gradient_descent=True): self.param = None self.learning_rate = learning_rate self.gradient_descent = gradient_descent self.sigmoid = Sigmoid() def _initialize_parameters(self, X): n_features = np.shape(X)[1] # Initialize parameters between [-1/sqrt(N), 1/sqrt(N)] limit = 1 / math.sqrt(n_features) self.param = np.random.uniform(-limit, limit, (n_features,)) def fit(self, X, y, n_iterations=4000): self._initialize_parameters(X) # Tune parameters for n iterations for i in range(n_iterations): # Make a new prediction y_pred = self.sigmoid(X.dot(self.param)) if self.gradient_descent: # Move against the gradient of the loss function with # respect to the parameters to minimize the loss self.param -= self.learning_rate * -(y - y_pred).dot(X) else: # Make a diagonal matrix of the sigmoid gradient column vector diag_gradient = make_diagonal(self.sigmoid.gradient(X.dot(self.param))) # Batch opt: self.param = np.linalg.pinv(X.T.dot(diag_gradient).dot(X)).dot(X.T).dot(diag_gradient.dot(X).dot(self.param) + y - y_pred) def predict(self, X): y_pred = np.round(self.sigmoid(X.dot(self.param))).astype(int) return y_pred ================================================ FILE: mlfromscratch/supervised_learning/multi_class_lda.py ================================================ from __future__ import print_function, division import matplotlib.pyplot as plt import numpy as np from mlfromscratch.utils import calculate_covariance_matrix, normalize, standardize class MultiClassLDA(): """Enables dimensionality reduction for multiple class distributions. It transforms the features space into a space where the between class scatter is maximized and the within class scatter is minimized. Parameters: ----------- solver: str If 'svd' we use the pseudo-inverse to calculate the inverse of matrices when doing the transformation. """ def __init__(self, solver="svd"): self.solver = solver def _calculate_scatter_matrices(self, X, y): n_features = np.shape(X)[1] labels = np.unique(y) # Within class scatter matrix: # SW = sum{ (X_for_class - mean_of_X_for_class)^2 } # <=> (n_samples_X_for_class - 1) * covar(X_for_class) SW = np.empty((n_features, n_features)) for label in labels: _X = X[y == label] SW += (len(_X) - 1) * calculate_covariance_matrix(_X) # Between class scatter: # SB = sum{ n_samples_for_class * (mean_for_class - total_mean)^2 } total_mean = np.mean(X, axis=0) SB = np.empty((n_features, n_features)) for label in labels: _X = X[y == label] _mean = np.mean(_X, axis=0) SB += len(_X) * (_mean - total_mean).dot((_mean - total_mean).T) return SW, SB def transform(self, X, y, n_components): SW, SB = self._calculate_scatter_matrices(X, y) # Determine SW^-1 * SB by calculating inverse of SW A = np.linalg.inv(SW).dot(SB) # Get eigenvalues and eigenvectors of SW^-1 * SB eigenvalues, eigenvectors = np.linalg.eigh(A) # Sort the eigenvalues and corresponding eigenvectors from largest # to smallest eigenvalue and select the first n_components idx = eigenvalues.argsort()[::-1] eigenvalues = eigenvalues[idx][:n_components] eigenvectors = eigenvectors[:, idx][:, :n_components] # Project the data onto eigenvectors X_transformed = X.dot(eigenvectors) return X_transformed def plot_in_2d(self, X, y, title=None): """ Plot the dataset X and the corresponding labels y in 2D using the LDA transformation.""" X_transformed = self.transform(X, y, n_components=2) x1 = X_transformed[:, 0] x2 = X_transformed[:, 1] plt.scatter(x1, x2, c=y) if title: plt.title(title) plt.show() ================================================ FILE: mlfromscratch/supervised_learning/multilayer_perceptron.py ================================================ from __future__ import print_function, division import numpy as np import math from sklearn import datasets from mlfromscratch.utils import train_test_split, to_categorical, normalize, accuracy_score, Plot from mlfromscratch.deep_learning.activation_functions import Sigmoid, Softmax from mlfromscratch.deep_learning.loss_functions import CrossEntropy class MultilayerPerceptron(): """Multilayer Perceptron classifier. A fully-connected neural network with one hidden layer. Unrolled to display the whole forward and backward pass. Parameters: ----------- n_hidden: int: The number of processing nodes (neurons) in the hidden layer. n_iterations: float The number of training iterations the algorithm will tune the weights for. learning_rate: float The step length that will be used when updating the weights. """ def __init__(self, n_hidden, n_iterations=3000, learning_rate=0.01): self.n_hidden = n_hidden self.n_iterations = n_iterations self.learning_rate = learning_rate self.hidden_activation = Sigmoid() self.output_activation = Softmax() self.loss = CrossEntropy() def _initialize_weights(self, X, y): n_samples, n_features = X.shape _, n_outputs = y.shape # Hidden layer limit = 1 / math.sqrt(n_features) self.W = np.random.uniform(-limit, limit, (n_features, self.n_hidden)) self.w0 = np.zeros((1, self.n_hidden)) # Output layer limit = 1 / math.sqrt(self.n_hidden) self.V = np.random.uniform(-limit, limit, (self.n_hidden, n_outputs)) self.v0 = np.zeros((1, n_outputs)) def fit(self, X, y): self._initialize_weights(X, y) for i in range(self.n_iterations): # .............. # Forward Pass # .............. # HIDDEN LAYER hidden_input = X.dot(self.W) + self.w0 hidden_output = self.hidden_activation(hidden_input) # OUTPUT LAYER output_layer_input = hidden_output.dot(self.V) + self.v0 y_pred = self.output_activation(output_layer_input) # ............... # Backward Pass # ............... # OUTPUT LAYER # Grad. w.r.t input of output layer grad_wrt_out_l_input = self.loss.gradient(y, y_pred) * self.output_activation.gradient(output_layer_input) grad_v = hidden_output.T.dot(grad_wrt_out_l_input) grad_v0 = np.sum(grad_wrt_out_l_input, axis=0, keepdims=True) # HIDDEN LAYER # Grad. w.r.t input of hidden layer grad_wrt_hidden_l_input = grad_wrt_out_l_input.dot(self.V.T) * self.hidden_activation.gradient(hidden_input) grad_w = X.T.dot(grad_wrt_hidden_l_input) grad_w0 = np.sum(grad_wrt_hidden_l_input, axis=0, keepdims=True) # Update weights (by gradient descent) # Move against the gradient to minimize loss self.V -= self.learning_rate * grad_v self.v0 -= self.learning_rate * grad_v0 self.W -= self.learning_rate * grad_w self.w0 -= self.learning_rate * grad_w0 # Use the trained model to predict labels of X def predict(self, X): # Forward pass: hidden_input = X.dot(self.W) + self.w0 hidden_output = self.hidden_activation(hidden_input) output_layer_input = hidden_output.dot(self.V) + self.v0 y_pred = self.output_activation(output_layer_input) return y_pred def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target # Convert the nominal y values to binary y = to_categorical(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1) # MLP clf = MultilayerPerceptron(n_hidden=16, n_iterations=1000, learning_rate=0.01) clf.fit(X_train, y_train) y_pred = np.argmax(clf.predict(X_test), axis=1) y_test = np.argmax(y_test, axis=1) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Multilayer Perceptron", accuracy=accuracy, legend_labels=np.unique(y)) if __name__ == "__main__": main() ================================================ FILE: mlfromscratch/supervised_learning/naive_bayes.py ================================================ from __future__ import division, print_function import numpy as np import math from mlfromscratch.utils import train_test_split, normalize from mlfromscratch.utils import Plot, accuracy_score class NaiveBayes(): """The Gaussian Naive Bayes classifier. """ def fit(self, X, y): self.X, self.y = X, y self.classes = np.unique(y) self.parameters = [] # Calculate the mean and variance of each feature for each class for i, c in enumerate(self.classes): # Only select the rows where the label equals the given class X_where_c = X[np.where(y == c)] self.parameters.append([]) # Add the mean and variance for each feature (column) for col in X_where_c.T: parameters = {"mean": col.mean(), "var": col.var()} self.parameters[i].append(parameters) def _calculate_likelihood(self, mean, var, x): """ Gaussian likelihood of the data x given mean and var """ eps = 1e-4 # Added in denominator to prevent division by zero coeff = 1.0 / math.sqrt(2.0 * math.pi * var + eps) exponent = math.exp(-(math.pow(x - mean, 2) / (2 * var + eps))) return coeff * exponent def _calculate_prior(self, c): """ Calculate the prior of class c (samples where class == c / total number of samples)""" frequency = np.mean(self.y == c) return frequency def _classify(self, sample): """ Classification using Bayes Rule P(Y|X) = P(X|Y)*P(Y)/P(X), or Posterior = Likelihood * Prior / Scaling Factor P(Y|X) - The posterior is the probability that sample x is of class y given the feature values of x being distributed according to distribution of y and the prior. P(X|Y) - Likelihood of data X given class distribution Y. Gaussian distribution (given by _calculate_likelihood) P(Y) - Prior (given by _calculate_prior) P(X) - Scales the posterior to make it a proper probability distribution. This term is ignored in this implementation since it doesn't affect which class distribution the sample is most likely to belong to. Classifies the sample as the class that results in the largest P(Y|X) (posterior) """ posteriors = [] # Go through list of classes for i, c in enumerate(self.classes): # Initialize posterior as prior posterior = self._calculate_prior(c) # Naive assumption (independence): # P(x1,x2,x3|Y) = P(x1|Y)*P(x2|Y)*P(x3|Y) # Posterior is product of prior and likelihoods (ignoring scaling factor) for feature_value, params in zip(sample, self.parameters[i]): # Likelihood of feature value given distribution of feature values given y likelihood = self._calculate_likelihood(params["mean"], params["var"], feature_value) posterior *= likelihood posteriors.append(posterior) # Return the class with the largest posterior probability return self.classes[np.argmax(posteriors)] def predict(self, X): """ Predict the class labels of the samples in X """ y_pred = [self._classify(sample) for sample in X] return y_pred ================================================ FILE: mlfromscratch/supervised_learning/neuroevolution.py ================================================ from __future__ import print_function, division import numpy as np import copy class Neuroevolution(): """ Evolutionary optimization of Neural Networks. Parameters: ----------- n_individuals: int The number of neural networks that are allowed in the population at a time. mutation_rate: float The probability that a weight will be mutated. model_builder: method A method which returns a user specified NeuralNetwork instance. """ def __init__(self, population_size, mutation_rate, model_builder): self.population_size = population_size self.mutation_rate = mutation_rate self.model_builder = model_builder def _build_model(self, id): """ Returns a new individual """ model = self.model_builder(n_inputs=self.X.shape[1], n_outputs=self.y.shape[1]) model.id = id model.fitness = 0 model.accuracy = 0 return model def _initialize_population(self): """ Initialization of the neural networks forming the population""" self.population = [] for _ in range(self.population_size): model = self._build_model(id=np.random.randint(1000)) self.population.append(model) def _mutate(self, individual, var=1): """ Add zero mean gaussian noise to the layer weights with probability mutation_rate """ for layer in individual.layers: if hasattr(layer, 'W'): # Mutation of weight with probability self.mutation_rate mutation_mask = np.random.binomial(1, p=self.mutation_rate, size=layer.W.shape) layer.W += np.random.normal(loc=0, scale=var, size=layer.W.shape) * mutation_mask mutation_mask = np.random.binomial(1, p=self.mutation_rate, size=layer.w0.shape) layer.w0 += np.random.normal(loc=0, scale=var, size=layer.w0.shape) * mutation_mask return individual def _inherit_weights(self, child, parent): """ Copies the weights from parent to child """ for i in range(len(child.layers)): if hasattr(child.layers[i], 'W'): # The child inherits both weights W and bias weights w0 child.layers[i].W = parent.layers[i].W.copy() child.layers[i].w0 = parent.layers[i].w0.copy() def _crossover(self, parent1, parent2): """ Performs crossover between the neurons in parent1 and parent2 to form offspring """ child1 = self._build_model(id=parent1.id+1) self._inherit_weights(child1, parent1) child2 = self._build_model(id=parent2.id+1) self._inherit_weights(child2, parent2) # Perform crossover for i in range(len(child1.layers)): if hasattr(child1.layers[i], 'W'): n_neurons = child1.layers[i].W.shape[1] # Perform crossover between the individuals' neuron weights cutoff = np.random.randint(0, n_neurons) child1.layers[i].W[:, cutoff:] = parent2.layers[i].W[:, cutoff:].copy() child1.layers[i].w0[:, cutoff:] = parent2.layers[i].w0[:, cutoff:].copy() child2.layers[i].W[:, cutoff:] = parent1.layers[i].W[:, cutoff:].copy() child2.layers[i].w0[:, cutoff:] = parent1.layers[i].w0[:, cutoff:].copy() return child1, child2 def _calculate_fitness(self): """ Evaluate the NNs on the test set to get fitness scores """ for individual in self.population: loss, acc = individual.test_on_batch(self.X, self.y) individual.fitness = 1 / (loss + 1e-8) individual.accuracy = acc def evolve(self, X, y, n_generations): """ Will evolve the population for n_generations based on dataset X and labels y""" self.X, self.y = X, y self._initialize_population() # The 40% highest fittest individuals will be selected for the next generation n_winners = int(self.population_size * 0.4) # The fittest 60% of the population will be selected as parents to form offspring n_parents = self.population_size - n_winners for epoch in range(n_generations): # Determine the fitness of the individuals in the population self._calculate_fitness() # Sort population by fitness sorted_i = np.argsort([model.fitness for model in self.population])[::-1] self.population = [self.population[i] for i in sorted_i] # Get the individual with the highest fitness fittest_individual = self.population[0] print ("[%d Best Individual - Fitness: %.5f, Accuracy: %.1f%%]" % (epoch, fittest_individual.fitness, float(100*fittest_individual.accuracy))) # The 'winners' are selected for the next generation next_population = [self.population[i] for i in range(n_winners)] total_fitness = np.sum([model.fitness for model in self.population]) # The probability that a individual will be selected as a parent is proportionate to its fitness parent_probabilities = [model.fitness / total_fitness for model in self.population] # Select parents according to probabilities (without replacement to preserve diversity) parents = np.random.choice(self.population, size=n_parents, p=parent_probabilities, replace=False) for i in np.arange(0, len(parents), 2): # Perform crossover to produce offspring child1, child2 = self._crossover(parents[i], parents[i+1]) # Save mutated offspring for next population next_population += [self._mutate(child1), self._mutate(child2)] self.population = next_population return fittest_individual ================================================ FILE: mlfromscratch/supervised_learning/particle_swarm_optimization.py ================================================ from __future__ import print_function, division import numpy as np import copy class ParticleSwarmOptimizedNN(): """ Particle Swarm Optimization of Neural Network. Parameters: ----------- n_individuals: int The number of neural networks that are allowed in the population at a time. model_builder: method A method which returns a user specified NeuralNetwork instance. inertia_weight: float [0,1) cognitive_weight: float [0,1) social_weight: float [0,1) max_velocity: float The maximum allowed value for the velocity. Reference: Neural Network Training Using Particle Swarm Optimization https://visualstudiomagazine.com/articles/2013/12/01/neural-network-training-using-particle-swarm-optimization.aspx """ def __init__(self, population_size, model_builder, inertia_weight=0.8, cognitive_weight=2, social_weight=2, max_velocity=20): self.population_size = population_size self.model_builder = model_builder self.best_individual = None # Parameters used to update velocity self.cognitive_w = cognitive_weight self.inertia_w = inertia_weight self.social_w = social_weight self.min_v = -max_velocity self.max_v = max_velocity def _build_model(self, id): """ Returns a new individual """ model = self.model_builder(n_inputs=self.X.shape[1], n_outputs=self.y.shape[1]) model.id = id model.fitness = 0 model.highest_fitness = 0 model.accuracy = 0 # Set intial best as the current initialization model.best_layers = copy.copy(model.layers) # Set initial velocity to zero model.velocity = [] for layer in model.layers: velocity = {"W": 0, "w0": 0} if hasattr(layer, 'W'): velocity = {"W": np.zeros_like(layer.W), "w0": np.zeros_like(layer.w0)} model.velocity.append(velocity) return model def _initialize_population(self): """ Initialization of the neural networks forming the population""" self.population = [] for i in range(self.population_size): model = self._build_model(id=i) self.population.append(model) def _update_weights(self, individual): """ Calculate the new velocity and update weights for each layer """ # Two random parameters used to update the velocity r1 = np.random.uniform() r2 = np.random.uniform() for i, layer in enumerate(individual.layers): if hasattr(layer, 'W'): # Layer weights velocity first_term_W = self.inertia_w * individual.velocity[i]["W"] second_term_W = self.cognitive_w * r1 * (individual.best_layers[i].W - layer.W) third_term_W = self.social_w * r2 * (self.best_individual.layers[i].W - layer.W) new_velocity = first_term_W + second_term_W + third_term_W individual.velocity[i]["W"] = np.clip(new_velocity, self.min_v, self.max_v) # Bias weight velocity first_term_w0 = self.inertia_w * individual.velocity[i]["w0"] second_term_w0 = self.cognitive_w * r1 * (individual.best_layers[i].w0 - layer.w0) third_term_w0 = self.social_w * r2 * (self.best_individual.layers[i].w0 - layer.w0) new_velocity = first_term_w0 + second_term_w0 + third_term_w0 individual.velocity[i]["w0"] = np.clip(new_velocity, self.min_v, self.max_v) # Update layer weights with velocity individual.layers[i].W += individual.velocity[i]["W"] individual.layers[i].w0 += individual.velocity[i]["w0"] def _calculate_fitness(self, individual): """ Evaluate the individual on the test set to get fitness scores """ loss, acc = individual.test_on_batch(self.X, self.y) individual.fitness = 1 / (loss + 1e-8) individual.accuracy = acc def evolve(self, X, y, n_generations): """ Will evolve the population for n_generations based on dataset X and labels y""" self.X, self.y = X, y self._initialize_population() # The best individual of the population is initialized as population's first ind. self.best_individual = copy.copy(self.population[0]) for epoch in range(n_generations): for individual in self.population: # Calculate new velocity and update the NN weights self._update_weights(individual) # Calculate the fitness of the updated individual self._calculate_fitness(individual) # If the current fitness is higher than the individual's previous highest # => update the individual's best layer setup if individual.fitness > individual.highest_fitness: individual.best_layers = copy.copy(individual.layers) individual.highest_fitness = individual.fitness # If the individual's fitness is higher than the highest recorded fitness for the # whole population => update the best individual if individual.fitness > self.best_individual.fitness: self.best_individual = copy.copy(individual) print ("[%d Best Individual - ID: %d Fitness: %.5f, Accuracy: %.1f%%]" % (epoch, self.best_individual.id, self.best_individual.fitness, 100*float(self.best_individual.accuracy))) return self.best_individual ================================================ FILE: mlfromscratch/supervised_learning/perceptron.py ================================================ from __future__ import print_function, division import math import numpy as np # Import helper functions from mlfromscratch.utils import train_test_split, to_categorical, normalize, accuracy_score from mlfromscratch.deep_learning.activation_functions import Sigmoid, ReLU, SoftPlus, LeakyReLU, TanH, ELU from mlfromscratch.deep_learning.loss_functions import CrossEntropy, SquareLoss from mlfromscratch.utils import Plot from mlfromscratch.utils.misc import bar_widgets import progressbar class Perceptron(): """The Perceptron. One layer neural network classifier. Parameters: ----------- n_iterations: float The number of training iterations the algorithm will tune the weights for. activation_function: class The activation that shall be used for each neuron. Possible choices: Sigmoid, ExpLU, ReLU, LeakyReLU, SoftPlus, TanH loss: class The loss function used to assess the model's performance. Possible choices: SquareLoss, CrossEntropy learning_rate: float The step length that will be used when updating the weights. """ def __init__(self, n_iterations=20000, activation_function=Sigmoid, loss=SquareLoss, learning_rate=0.01): self.n_iterations = n_iterations self.learning_rate = learning_rate self.loss = loss() self.activation_func = activation_function() self.progressbar = progressbar.ProgressBar(widgets=bar_widgets) def fit(self, X, y): n_samples, n_features = np.shape(X) _, n_outputs = np.shape(y) # Initialize weights between [-1/sqrt(N), 1/sqrt(N)] limit = 1 / math.sqrt(n_features) self.W = np.random.uniform(-limit, limit, (n_features, n_outputs)) self.w0 = np.zeros((1, n_outputs)) for i in self.progressbar(range(self.n_iterations)): # Calculate outputs linear_output = X.dot(self.W) + self.w0 y_pred = self.activation_func(linear_output) # Calculate the loss gradient w.r.t the input of the activation function error_gradient = self.loss.gradient(y, y_pred) * self.activation_func.gradient(linear_output) # Calculate the gradient of the loss with respect to each weight grad_wrt_w = X.T.dot(error_gradient) grad_wrt_w0 = np.sum(error_gradient, axis=0, keepdims=True) # Update weights self.W -= self.learning_rate * grad_wrt_w self.w0 -= self.learning_rate * grad_wrt_w0 # Use the trained model to predict labels of X def predict(self, X): y_pred = self.activation_func(X.dot(self.W) + self.w0) return y_pred ================================================ FILE: mlfromscratch/supervised_learning/random_forest.py ================================================ from __future__ import division, print_function import numpy as np import math import progressbar # Import helper functions from mlfromscratch.utils import divide_on_feature, train_test_split, get_random_subsets, normalize from mlfromscratch.utils import accuracy_score, calculate_entropy from mlfromscratch.unsupervised_learning import PCA from mlfromscratch.supervised_learning import ClassificationTree from mlfromscratch.utils.misc import bar_widgets from mlfromscratch.utils import Plot class RandomForest(): """Random Forest classifier. Uses a collection of classification trees that trains on random subsets of the data using a random subsets of the features. Parameters: ----------- n_estimators: int The number of classification trees that are used. max_features: int The maximum number of features that the classification trees are allowed to use. min_samples_split: int The minimum number of samples needed to make a split when building a tree. min_gain: float The minimum impurity required to split the tree further. max_depth: int The maximum depth of a tree. """ def __init__(self, n_estimators=100, max_features=None, min_samples_split=2, min_gain=0, max_depth=float("inf")): self.n_estimators = n_estimators # Number of trees self.max_features = max_features # Maxmimum number of features per tree self.min_samples_split = min_samples_split self.min_gain = min_gain # Minimum information gain req. to continue self.max_depth = max_depth # Maximum depth for tree self.progressbar = progressbar.ProgressBar(widgets=bar_widgets) # Initialize decision trees self.trees = [] for _ in range(n_estimators): self.trees.append( ClassificationTree( min_samples_split=self.min_samples_split, min_impurity=min_gain, max_depth=self.max_depth)) def fit(self, X, y): n_features = np.shape(X)[1] # If max_features have not been defined => select it as # sqrt(n_features) if not self.max_features: self.max_features = int(math.sqrt(n_features)) # Choose one random subset of the data for each tree subsets = get_random_subsets(X, y, self.n_estimators) for i in self.progressbar(range(self.n_estimators)): X_subset, y_subset = subsets[i] # Feature bagging (select random subsets of the features) idx = np.random.choice(range(n_features), size=self.max_features, replace=True) # Save the indices of the features for prediction self.trees[i].feature_indices = idx # Choose the features corresponding to the indices X_subset = X_subset[:, idx] # Fit the tree to the data self.trees[i].fit(X_subset, y_subset) def predict(self, X): y_preds = np.empty((X.shape[0], len(self.trees))) # Let each tree make a prediction on the data for i, tree in enumerate(self.trees): # Indices of the features that the tree has trained on idx = tree.feature_indices # Make a prediction based on those features prediction = tree.predict(X[:, idx]) y_preds[:, i] = prediction y_pred = [] # For each sample for sample_predictions in y_preds: # Select the most common class prediction y_pred.append(np.bincount(sample_predictions.astype('int')).argmax()) return y_pred ================================================ FILE: mlfromscratch/supervised_learning/regression.py ================================================ from __future__ import print_function, division import numpy as np import math from mlfromscratch.utils import normalize, polynomial_features class l1_regularization(): """ Regularization for Lasso Regression """ def __init__(self, alpha): self.alpha = alpha def __call__(self, w): return self.alpha * np.linalg.norm(w) def grad(self, w): return self.alpha * np.sign(w) class l2_regularization(): """ Regularization for Ridge Regression """ def __init__(self, alpha): self.alpha = alpha def __call__(self, w): return self.alpha * 0.5 * w.T.dot(w) def grad(self, w): return self.alpha * w class l1_l2_regularization(): """ Regularization for Elastic Net Regression """ def __init__(self, alpha, l1_ratio=0.5): self.alpha = alpha self.l1_ratio = l1_ratio def __call__(self, w): l1_contr = self.l1_ratio * np.linalg.norm(w) l2_contr = (1 - self.l1_ratio) * 0.5 * w.T.dot(w) return self.alpha * (l1_contr + l2_contr) def grad(self, w): l1_contr = self.l1_ratio * np.sign(w) l2_contr = (1 - self.l1_ratio) * w return self.alpha * (l1_contr + l2_contr) class Regression(object): """ Base regression model. Models the relationship between a scalar dependent variable y and the independent variables X. Parameters: ----------- n_iterations: float The number of training iterations the algorithm will tune the weights for. learning_rate: float The step length that will be used when updating the weights. """ def __init__(self, n_iterations, learning_rate): self.n_iterations = n_iterations self.learning_rate = learning_rate def initialize_weights(self, n_features): """ Initialize weights randomly [-1/N, 1/N] """ limit = 1 / math.sqrt(n_features) self.w = np.random.uniform(-limit, limit, (n_features, )) def fit(self, X, y): # Insert constant ones for bias weights X = np.insert(X, 0, 1, axis=1) self.training_errors = [] self.initialize_weights(n_features=X.shape[1]) # Do gradient descent for n_iterations for i in range(self.n_iterations): y_pred = X.dot(self.w) # Calculate l2 loss mse = np.mean(0.5 * (y - y_pred)**2 + self.regularization(self.w)) self.training_errors.append(mse) # Gradient of l2 loss w.r.t w grad_w = -(y - y_pred).dot(X) + self.regularization.grad(self.w) # Update the weights self.w -= self.learning_rate * grad_w def predict(self, X): # Insert constant ones for bias weights X = np.insert(X, 0, 1, axis=1) y_pred = X.dot(self.w) return y_pred class LinearRegression(Regression): """Linear model. Parameters: ----------- n_iterations: float The number of training iterations the algorithm will tune the weights for. learning_rate: float The step length that will be used when updating the weights. gradient_descent: boolean True or false depending if gradient descent should be used when training. If false then we use batch optimization by least squares. """ def __init__(self, n_iterations=100, learning_rate=0.001, gradient_descent=True): self.gradient_descent = gradient_descent # No regularization self.regularization = lambda x: 0 self.regularization.grad = lambda x: 0 super(LinearRegression, self).__init__(n_iterations=n_iterations, learning_rate=learning_rate) def fit(self, X, y): # If not gradient descent => Least squares approximation of w if not self.gradient_descent: # Insert constant ones for bias weights X = np.insert(X, 0, 1, axis=1) # Calculate weights by least squares (using Moore-Penrose pseudoinverse) U, S, V = np.linalg.svd(X.T.dot(X)) S = np.diag(S) X_sq_reg_inv = V.dot(np.linalg.pinv(S)).dot(U.T) self.w = X_sq_reg_inv.dot(X.T).dot(y) else: super(LinearRegression, self).fit(X, y) class LassoRegression(Regression): """Linear regression model with a regularization factor which does both variable selection and regularization. Model that tries to balance the fit of the model with respect to the training data and the complexity of the model. A large regularization factor with decreases the variance of the model and do para. Parameters: ----------- degree: int The degree of the polynomial that the independent variable X will be transformed to. reg_factor: float The factor that will determine the amount of regularization and feature shrinkage. n_iterations: float The number of training iterations the algorithm will tune the weights for. learning_rate: float The step length that will be used when updating the weights. """ def __init__(self, degree, reg_factor, n_iterations=3000, learning_rate=0.01): self.degree = degree self.regularization = l1_regularization(alpha=reg_factor) super(LassoRegression, self).__init__(n_iterations, learning_rate) def fit(self, X, y): X = normalize(polynomial_features(X, degree=self.degree)) super(LassoRegression, self).fit(X, y) def predict(self, X): X = normalize(polynomial_features(X, degree=self.degree)) return super(LassoRegression, self).predict(X) class PolynomialRegression(Regression): """Performs a non-linear transformation of the data before fitting the model and doing predictions which allows for doing non-linear regression. Parameters: ----------- degree: int The degree of the polynomial that the independent variable X will be transformed to. n_iterations: float The number of training iterations the algorithm will tune the weights for. learning_rate: float The step length that will be used when updating the weights. """ def __init__(self, degree, n_iterations=3000, learning_rate=0.001): self.degree = degree # No regularization self.regularization = lambda x: 0 self.regularization.grad = lambda x: 0 super(PolynomialRegression, self).__init__(n_iterations=n_iterations, learning_rate=learning_rate) def fit(self, X, y): X = polynomial_features(X, degree=self.degree) super(PolynomialRegression, self).fit(X, y) def predict(self, X): X = polynomial_features(X, degree=self.degree) return super(PolynomialRegression, self).predict(X) class RidgeRegression(Regression): """Also referred to as Tikhonov regularization. Linear regression model with a regularization factor. Model that tries to balance the fit of the model with respect to the training data and the complexity of the model. A large regularization factor with decreases the variance of the model. Parameters: ----------- reg_factor: float The factor that will determine the amount of regularization and feature shrinkage. n_iterations: float The number of training iterations the algorithm will tune the weights for. learning_rate: float The step length that will be used when updating the weights. """ def __init__(self, reg_factor, n_iterations=1000, learning_rate=0.001): self.regularization = l2_regularization(alpha=reg_factor) super(RidgeRegression, self).__init__(n_iterations, learning_rate) class PolynomialRidgeRegression(Regression): """Similar to regular ridge regression except that the data is transformed to allow for polynomial regression. Parameters: ----------- degree: int The degree of the polynomial that the independent variable X will be transformed to. reg_factor: float The factor that will determine the amount of regularization and feature shrinkage. n_iterations: float The number of training iterations the algorithm will tune the weights for. learning_rate: float The step length that will be used when updating the weights. """ def __init__(self, degree, reg_factor, n_iterations=3000, learning_rate=0.01, gradient_descent=True): self.degree = degree self.regularization = l2_regularization(alpha=reg_factor) super(PolynomialRidgeRegression, self).__init__(n_iterations, learning_rate) def fit(self, X, y): X = normalize(polynomial_features(X, degree=self.degree)) super(PolynomialRidgeRegression, self).fit(X, y) def predict(self, X): X = normalize(polynomial_features(X, degree=self.degree)) return super(PolynomialRidgeRegression, self).predict(X) class ElasticNet(Regression): """ Regression where a combination of l1 and l2 regularization are used. The ratio of their contributions are set with the 'l1_ratio' parameter. Parameters: ----------- degree: int The degree of the polynomial that the independent variable X will be transformed to. reg_factor: float The factor that will determine the amount of regularization and feature shrinkage. l1_ration: float Weighs the contribution of l1 and l2 regularization. n_iterations: float The number of training iterations the algorithm will tune the weights for. learning_rate: float The step length that will be used when updating the weights. """ def __init__(self, degree=1, reg_factor=0.05, l1_ratio=0.5, n_iterations=3000, learning_rate=0.01): self.degree = degree self.regularization = l1_l2_regularization(alpha=reg_factor, l1_ratio=l1_ratio) super(ElasticNet, self).__init__(n_iterations, learning_rate) def fit(self, X, y): X = normalize(polynomial_features(X, degree=self.degree)) super(ElasticNet, self).fit(X, y) def predict(self, X): X = normalize(polynomial_features(X, degree=self.degree)) return super(ElasticNet, self).predict(X) ================================================ FILE: mlfromscratch/supervised_learning/support_vector_machine.py ================================================ from __future__ import division, print_function import numpy as np import cvxopt from mlfromscratch.utils import train_test_split, normalize, accuracy_score from mlfromscratch.utils.kernels import * from mlfromscratch.utils import Plot # Hide cvxopt output cvxopt.solvers.options['show_progress'] = False class SupportVectorMachine(object): """The Support Vector Machine classifier. Uses cvxopt to solve the quadratic optimization problem. Parameters: ----------- C: float Penalty term. kernel: function Kernel function. Can be either polynomial, rbf or linear. power: int The degree of the polynomial kernel. Will be ignored by the other kernel functions. gamma: float Used in the rbf kernel function. coef: float Bias term used in the polynomial kernel function. """ def __init__(self, C=1, kernel=rbf_kernel, power=4, gamma=None, coef=4): self.C = C self.kernel = kernel self.power = power self.gamma = gamma self.coef = coef self.lagr_multipliers = None self.support_vectors = None self.support_vector_labels = None self.intercept = None def fit(self, X, y): n_samples, n_features = np.shape(X) # Set gamma to 1/n_features by default if not self.gamma: self.gamma = 1 / n_features # Initialize kernel method with parameters self.kernel = self.kernel( power=self.power, gamma=self.gamma, coef=self.coef) # Calculate kernel matrix kernel_matrix = np.zeros((n_samples, n_samples)) for i in range(n_samples): for j in range(n_samples): kernel_matrix[i, j] = self.kernel(X[i], X[j]) # Define the quadratic optimization problem P = cvxopt.matrix(np.outer(y, y) * kernel_matrix, tc='d') q = cvxopt.matrix(np.ones(n_samples) * -1) A = cvxopt.matrix(y, (1, n_samples), tc='d') b = cvxopt.matrix(0, tc='d') if not self.C: G = cvxopt.matrix(np.identity(n_samples) * -1) h = cvxopt.matrix(np.zeros(n_samples)) else: G_max = np.identity(n_samples) * -1 G_min = np.identity(n_samples) G = cvxopt.matrix(np.vstack((G_max, G_min))) h_max = cvxopt.matrix(np.zeros(n_samples)) h_min = cvxopt.matrix(np.ones(n_samples) * self.C) h = cvxopt.matrix(np.vstack((h_max, h_min))) # Solve the quadratic optimization problem using cvxopt minimization = cvxopt.solvers.qp(P, q, G, h, A, b) # Lagrange multipliers lagr_mult = np.ravel(minimization['x']) # Extract support vectors # Get indexes of non-zero lagr. multipiers idx = lagr_mult > 1e-7 # Get the corresponding lagr. multipliers self.lagr_multipliers = lagr_mult[idx] # Get the samples that will act as support vectors self.support_vectors = X[idx] # Get the corresponding labels self.support_vector_labels = y[idx] # Calculate intercept with first support vector self.intercept = self.support_vector_labels[0] for i in range(len(self.lagr_multipliers)): self.intercept -= self.lagr_multipliers[i] * self.support_vector_labels[ i] * self.kernel(self.support_vectors[i], self.support_vectors[0]) def predict(self, X): y_pred = [] # Iterate through list of samples and make predictions for sample in X: prediction = 0 # Determine the label of the sample by the support vectors for i in range(len(self.lagr_multipliers)): prediction += self.lagr_multipliers[i] * self.support_vector_labels[ i] * self.kernel(self.support_vectors[i], sample) prediction += self.intercept y_pred.append(np.sign(prediction)) return np.array(y_pred) ================================================ FILE: mlfromscratch/supervised_learning/xgboost.py ================================================ from __future__ import division, print_function import numpy as np import progressbar from mlfromscratch.utils import train_test_split, standardize, to_categorical, normalize from mlfromscratch.utils import mean_squared_error, accuracy_score from mlfromscratch.supervised_learning import XGBoostRegressionTree from mlfromscratch.deep_learning.activation_functions import Sigmoid from mlfromscratch.utils.misc import bar_widgets from mlfromscratch.utils import Plot class LogisticLoss(): def __init__(self): sigmoid = Sigmoid() self.log_func = sigmoid self.log_grad = sigmoid.gradient def loss(self, y, y_pred): y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15) p = self.log_func(y_pred) return y * np.log(p) + (1 - y) * np.log(1 - p) # gradient w.r.t y_pred def gradient(self, y, y_pred): p = self.log_func(y_pred) return -(y - p) # w.r.t y_pred def hess(self, y, y_pred): p = self.log_func(y_pred) return p * (1 - p) class XGBoost(object): """The XGBoost classifier. Reference: http://xgboost.readthedocs.io/en/latest/model.html Parameters: ----------- n_estimators: int The number of classification trees that are used. learning_rate: float The step length that will be taken when following the negative gradient during training. min_samples_split: int The minimum number of samples needed to make a split when building a tree. min_impurity: float The minimum impurity required to split the tree further. max_depth: int The maximum depth of a tree. """ def __init__(self, n_estimators=200, learning_rate=0.001, min_samples_split=2, min_impurity=1e-7, max_depth=2): self.n_estimators = n_estimators # Number of trees self.learning_rate = learning_rate # Step size for weight update self.min_samples_split = min_samples_split # The minimum n of sampels to justify split self.min_impurity = min_impurity # Minimum variance reduction to continue self.max_depth = max_depth # Maximum depth for tree self.bar = progressbar.ProgressBar(widgets=bar_widgets) # Log loss for classification self.loss = LogisticLoss() # Initialize regression trees self.trees = [] for _ in range(n_estimators): tree = XGBoostRegressionTree( min_samples_split=self.min_samples_split, min_impurity=min_impurity, max_depth=self.max_depth, loss=self.loss) self.trees.append(tree) def fit(self, X, y): y = to_categorical(y) y_pred = np.zeros(np.shape(y)) for i in self.bar(range(self.n_estimators)): tree = self.trees[i] y_and_pred = np.concatenate((y, y_pred), axis=1) tree.fit(X, y_and_pred) update_pred = tree.predict(X) y_pred -= np.multiply(self.learning_rate, update_pred) def predict(self, X): y_pred = None # Make predictions for tree in self.trees: # Estimate gradient and update prediction update_pred = tree.predict(X) if y_pred is None: y_pred = np.zeros_like(update_pred) y_pred -= np.multiply(self.learning_rate, update_pred) # Turn into probability distribution (Softmax) y_pred = np.exp(y_pred) / np.sum(np.exp(y_pred), axis=1, keepdims=True) # Set label to the value that maximizes probability y_pred = np.argmax(y_pred, axis=1) return y_pred ================================================ FILE: mlfromscratch/unsupervised_learning/__init__.py ================================================ from .principal_component_analysis import PCA from .apriori import Apriori from .dbscan import DBSCAN from .fp_growth import FPGrowth from .gaussian_mixture_model import GaussianMixtureModel from .genetic_algorithm import GeneticAlgorithm from .k_means import KMeans from .partitioning_around_medoids import PAM from .restricted_boltzmann_machine import RBM ================================================ FILE: mlfromscratch/unsupervised_learning/apriori.py ================================================ from __future__ import division, print_function import numpy as np import itertools class Rule(): def __init__(self, antecedent, concequent, confidence, support): self.antecedent = antecedent self.concequent = concequent self.confidence = confidence self.support = support class Apriori(): """A method for determining frequent itemsets in a transactional database and also for generating rules for those itemsets. Parameters: ----------- min_sup: float The minimum fraction of transactions an itemets needs to occur in to be deemed frequent min_conf: float: The minimum fraction of times the antecedent needs to imply the concequent to justify rule """ def __init__(self, min_sup=0.3, min_conf=0.81): self.min_sup = min_sup self.min_conf = min_conf self.freq_itemsets = None # List of freqeuent itemsets self.transactions = None # List of transactions def _calculate_support(self, itemset): count = 0 for transaction in self.transactions: if self._transaction_contains_items(transaction, itemset): count += 1 support = count / len(self.transactions) return support def _get_frequent_itemsets(self, candidates): """ Prunes the candidates that are not frequent => returns list with only frequent itemsets """ frequent = [] # Find frequent items for itemset in candidates: support = self._calculate_support(itemset) if support >= self.min_sup: frequent.append(itemset) return frequent def _has_infrequent_itemsets(self, candidate): """ True or false depending on the candidate has any subset with size k - 1 that is not in the frequent itemset """ k = len(candidate) # Find all combinations of size k-1 in candidate # E.g [1,2,3] => [[1,2],[1,3],[2,3]] subsets = list(itertools.combinations(candidate, k - 1)) for t in subsets: # t - is tuple. If size == 1 get the element subset = list(t) if len(t) > 1 else t[0] if not subset in self.freq_itemsets[-1]: return True return False def _generate_candidates(self, freq_itemset): """ Joins the elements in the frequent itemset and prunes resulting sets if they contain subsets that have been determined to be infrequent. """ candidates = [] for itemset1 in freq_itemset: for itemset2 in freq_itemset: # Valid if every element but the last are the same # and the last element in itemset1 is smaller than the last # in itemset2 valid = False single_item = isinstance(itemset1, int) if single_item and itemset1 < itemset2: valid = True elif not single_item and np.array_equal(itemset1[:-1], itemset2[:-1]) and itemset1[-1] < itemset2[-1]: valid = True if valid: # JOIN: Add the last element in itemset2 to itemset1 to # create a new candidate if single_item: candidate = [itemset1, itemset2] else: candidate = itemset1 + [itemset2[-1]] # PRUNE: Check if any subset of candidate have been determined # to be infrequent infrequent = self._has_infrequent_itemsets(candidate) if not infrequent: candidates.append(candidate) return candidates def _transaction_contains_items(self, transaction, items): """ True or false depending on each item in the itemset is in the transaction """ # If items is in fact only one item if isinstance(items, int): return items in transaction # Iterate through list of items and make sure that # all items are in the transaction for item in items: if not item in transaction: return False return True def find_frequent_itemsets(self, transactions): """ Returns the set of frequent itemsets in the list of transactions """ self.transactions = transactions # Get all unique items in the transactions unique_items = set(item for transaction in self.transactions for item in transaction) # Get the frequent items self.freq_itemsets = [self._get_frequent_itemsets(unique_items)] while(True): # Generate new candidates from last added frequent itemsets candidates = self._generate_candidates(self.freq_itemsets[-1]) # Get the frequent itemsets among those candidates frequent_itemsets = self._get_frequent_itemsets(candidates) # If there are no frequent itemsets we're done if not frequent_itemsets: break # Add them to the total list of frequent itemsets and start over self.freq_itemsets.append(frequent_itemsets) # Flatten the array and return every frequent itemset frequent_itemsets = [ itemset for sublist in self.freq_itemsets for itemset in sublist] return frequent_itemsets def _rules_from_itemset(self, initial_itemset, itemset): """ Recursive function which returns the rules where confidence >= min_confidence Starts with large itemset and recursively explores rules for subsets """ rules = [] k = len(itemset) # Get all combinations of sub-itemsets of size k - 1 from itemset # E.g [1,2,3] => [[1,2],[1,3],[2,3]] subsets = list(itertools.combinations(itemset, k - 1)) support = self._calculate_support(initial_itemset) for antecedent in subsets: # itertools.combinations returns tuples => convert to list antecedent = list(antecedent) antecedent_support = self._calculate_support(antecedent) # Calculate the confidence as sup(A and B) / sup(B), if antecedent # is B in an itemset of A and B confidence = float("{0:.2f}".format(support / antecedent_support)) if confidence >= self.min_conf: # The concequent is the initial_itemset except for antecedent concequent = [itemset for itemset in initial_itemset if not itemset in antecedent] # If single item => get item if len(antecedent) == 1: antecedent = antecedent[0] if len(concequent) == 1: concequent = concequent[0] # Create new rule rule = Rule( antecedent=antecedent, concequent=concequent, confidence=confidence, support=support) rules.append(rule) # If there are subsets that could result in rules # recursively add rules from subsets if k - 1 > 1: rules += self._rules_from_itemset(initial_itemset, antecedent) return rules def generate_rules(self, transactions): self.transactions = transactions frequent_itemsets = self.find_frequent_itemsets(transactions) # Only consider itemsets of size >= 2 items frequent_itemsets = [itemset for itemset in frequent_itemsets if not isinstance( itemset, int)] rules = [] for itemset in frequent_itemsets: rules += self._rules_from_itemset(itemset, itemset) # Remove empty values return rules ================================================ FILE: mlfromscratch/unsupervised_learning/autoencoder.py ================================================ from __future__ import print_function, division from sklearn import datasets import math import matplotlib.pyplot as plt import numpy as np import progressbar from sklearn.datasets import fetch_mldata from mlfromscratch.deep_learning.optimizers import Adam from mlfromscratch.deep_learning.loss_functions import CrossEntropy, SquareLoss from mlfromscratch.deep_learning.layers import Dense, Dropout, Flatten, Activation, Reshape, BatchNormalization from mlfromscratch.deep_learning import NeuralNetwork class Autoencoder(): """An Autoencoder with deep fully-connected neural nets. Training Data: MNIST Handwritten Digits (28x28 images) """ def __init__(self): self.img_rows = 28 self.img_cols = 28 self.img_dim = self.img_rows * self.img_cols self.latent_dim = 128 # The dimension of the data embedding optimizer = Adam(learning_rate=0.0002, b1=0.5) loss_function = SquareLoss self.encoder = self.build_encoder(optimizer, loss_function) self.decoder = self.build_decoder(optimizer, loss_function) self.autoencoder = NeuralNetwork(optimizer=optimizer, loss=loss_function) self.autoencoder.layers.extend(self.encoder.layers) self.autoencoder.layers.extend(self.decoder.layers) print () self.autoencoder.summary(name="Variational Autoencoder") def build_encoder(self, optimizer, loss_function): encoder = NeuralNetwork(optimizer=optimizer, loss=loss_function) encoder.add(Dense(512, input_shape=(self.img_dim,))) encoder.add(Activation('leaky_relu')) encoder.add(BatchNormalization(momentum=0.8)) encoder.add(Dense(256)) encoder.add(Activation('leaky_relu')) encoder.add(BatchNormalization(momentum=0.8)) encoder.add(Dense(self.latent_dim)) return encoder def build_decoder(self, optimizer, loss_function): decoder = NeuralNetwork(optimizer=optimizer, loss=loss_function) decoder.add(Dense(256, input_shape=(self.latent_dim,))) decoder.add(Activation('leaky_relu')) decoder.add(BatchNormalization(momentum=0.8)) decoder.add(Dense(512)) decoder.add(Activation('leaky_relu')) decoder.add(BatchNormalization(momentum=0.8)) decoder.add(Dense(self.img_dim)) decoder.add(Activation('tanh')) return decoder def train(self, n_epochs, batch_size=128, save_interval=50): mnist = fetch_mldata('MNIST original') X = mnist.data y = mnist.target # Rescale [-1, 1] X = (X.astype(np.float32) - 127.5) / 127.5 for epoch in range(n_epochs): # Select a random half batch of images idx = np.random.randint(0, X.shape[0], batch_size) imgs = X[idx] # Train the Autoencoder loss, _ = self.autoencoder.train_on_batch(imgs, imgs) # Display the progress print ("%d [D loss: %f]" % (epoch, loss)) # If at save interval => save generated image samples if epoch % save_interval == 0: self.save_imgs(epoch, X) def save_imgs(self, epoch, X): r, c = 5, 5 # Grid size # Select a random half batch of images idx = np.random.randint(0, X.shape[0], r*c) imgs = X[idx] # Generate images and reshape to image shape gen_imgs = self.autoencoder.predict(imgs).reshape((-1, self.img_rows, self.img_cols)) # Rescale images 0 - 1 gen_imgs = 0.5 * gen_imgs + 0.5 fig, axs = plt.subplots(r, c) plt.suptitle("Autoencoder") cnt = 0 for i in range(r): for j in range(c): axs[i,j].imshow(gen_imgs[cnt,:,:], cmap='gray') axs[i,j].axis('off') cnt += 1 fig.savefig("ae_%d.png" % epoch) plt.close() if __name__ == '__main__': ae = Autoencoder() ae.train(n_epochs=200000, batch_size=64, save_interval=400) ================================================ FILE: mlfromscratch/unsupervised_learning/dbscan.py ================================================ from __future__ import print_function, division import numpy as np from mlfromscratch.utils import Plot, euclidean_distance, normalize class DBSCAN(): """A density based clustering method that expands clusters from samples that have more neighbors within a radius specified by eps than the value min_samples. Parameters: ----------- eps: float The radius within which samples are considered neighbors min_samples: int The number of neighbors required for the sample to be a core point. """ def __init__(self, eps=1, min_samples=5): self.eps = eps self.min_samples = min_samples def _get_neighbors(self, sample_i): """ Return a list of indexes of neighboring samples A sample_2 is considered a neighbor of sample_1 if the distance between them is smaller than epsilon """ neighbors = [] idxs = np.arange(len(self.X)) for i, _sample in enumerate(self.X[idxs != sample_i]): distance = euclidean_distance(self.X[sample_i], _sample) if distance < self.eps: neighbors.append(i) return np.array(neighbors) def _expand_cluster(self, sample_i, neighbors): """ Recursive method which expands the cluster until we have reached the border of the dense area (density determined by eps and min_samples) """ cluster = [sample_i] # Iterate through neighbors for neighbor_i in neighbors: if not neighbor_i in self.visited_samples: self.visited_samples.append(neighbor_i) # Fetch the sample's distant neighbors (neighbors of neighbor) self.neighbors[neighbor_i] = self._get_neighbors(neighbor_i) # Make sure the neighbor's neighbors are more than min_samples # (If this is true the neighbor is a core point) if len(self.neighbors[neighbor_i]) >= self.min_samples: # Expand the cluster from the neighbor expanded_cluster = self._expand_cluster( neighbor_i, self.neighbors[neighbor_i]) # Add expanded cluster to this cluster cluster = cluster + expanded_cluster else: # If the neighbor is not a core point we only add the neighbor point cluster.append(neighbor_i) return cluster def _get_cluster_labels(self): """ Return the samples labels as the index of the cluster in which they are contained """ # Set default value to number of clusters # Will make sure all outliers have same cluster label labels = np.full(shape=self.X.shape[0], fill_value=len(self.clusters)) for cluster_i, cluster in enumerate(self.clusters): for sample_i in cluster: labels[sample_i] = cluster_i return labels # DBSCAN def predict(self, X): self.X = X self.clusters = [] self.visited_samples = [] self.neighbors = {} n_samples = np.shape(self.X)[0] # Iterate through samples and expand clusters from them # if they have more neighbors than self.min_samples for sample_i in range(n_samples): if sample_i in self.visited_samples: continue self.neighbors[sample_i] = self._get_neighbors(sample_i) if len(self.neighbors[sample_i]) >= self.min_samples: # If core point => mark as visited self.visited_samples.append(sample_i) # Sample has more neighbors than self.min_samples => expand # cluster from sample new_cluster = self._expand_cluster( sample_i, self.neighbors[sample_i]) # Add cluster to list of clusters self.clusters.append(new_cluster) # Get the resulting cluster labels cluster_labels = self._get_cluster_labels() return cluster_labels ================================================ FILE: mlfromscratch/unsupervised_learning/dcgan.py ================================================ from __future__ import print_function, division import matplotlib.pyplot as plt import numpy as np import progressbar from sklearn.datasets import fetch_mldata from mlfromscratch.deep_learning.optimizers import Adam from mlfromscratch.deep_learning.loss_functions import CrossEntropy from mlfromscratch.deep_learning.layers import Dense, Dropout, Flatten, Activation, Reshape, BatchNormalization, ZeroPadding2D, Conv2D, UpSampling2D from mlfromscratch.deep_learning import NeuralNetwork class DCGAN(): def __init__(self): self.img_rows = 28 self.img_cols = 28 self.channels = 1 self.img_shape = (self.channels, self.img_rows, self.img_cols) self.latent_dim = 100 optimizer = Adam(learning_rate=0.0002, b1=0.5) loss_function = CrossEntropy # Build the discriminator self.discriminator = self.build_discriminator(optimizer, loss_function) # Build the generator self.generator = self.build_generator(optimizer, loss_function) # Build the combined model self.combined = NeuralNetwork(optimizer=optimizer, loss=loss_function) self.combined.layers.extend(self.generator.layers) self.combined.layers.extend(self.discriminator.layers) print () self.generator.summary(name="Generator") self.discriminator.summary(name="Discriminator") def build_generator(self, optimizer, loss_function): model = NeuralNetwork(optimizer=optimizer, loss=loss_function) model.add(Dense(128 * 7 * 7, input_shape=(100,))) model.add(Activation('leaky_relu')) model.add(Reshape((128, 7, 7))) model.add(BatchNormalization(momentum=0.8)) model.add(UpSampling2D()) model.add(Conv2D(128, filter_shape=(3,3), padding='same')) model.add(Activation("leaky_relu")) model.add(BatchNormalization(momentum=0.8)) model.add(UpSampling2D()) model.add(Conv2D(64, filter_shape=(3,3), padding='same')) model.add(Activation("leaky_relu")) model.add(BatchNormalization(momentum=0.8)) model.add(Conv2D(1, filter_shape=(3,3), padding='same')) model.add(Activation("tanh")) return model def build_discriminator(self, optimizer, loss_function): model = NeuralNetwork(optimizer=optimizer, loss=loss_function) model.add(Conv2D(32, filter_shape=(3,3), stride=2, input_shape=self.img_shape, padding='same')) model.add(Activation('leaky_relu')) model.add(Dropout(0.25)) model.add(Conv2D(64, filter_shape=(3,3), stride=2, padding='same')) model.add(ZeroPadding2D(padding=((0,1),(0,1)))) model.add(Activation('leaky_relu')) model.add(Dropout(0.25)) model.add(BatchNormalization(momentum=0.8)) model.add(Conv2D(128, filter_shape=(3,3), stride=2, padding='same')) model.add(Activation('leaky_relu')) model.add(Dropout(0.25)) model.add(BatchNormalization(momentum=0.8)) model.add(Conv2D(256, filter_shape=(3,3), stride=1, padding='same')) model.add(Activation('leaky_relu')) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(2)) model.add(Activation('softmax')) return model def train(self, epochs, batch_size=128, save_interval=50): mnist = fetch_mldata('MNIST original') X = mnist.data.reshape((-1,) + self.img_shape) y = mnist.target # Rescale -1 to 1 X = (X.astype(np.float32) - 127.5) / 127.5 half_batch = int(batch_size / 2) for epoch in range(epochs): # --------------------- # Train Discriminator # --------------------- self.discriminator.set_trainable(True) # Select a random half batch of images idx = np.random.randint(0, X.shape[0], half_batch) imgs = X[idx] # Sample noise to use as generator input noise = np.random.normal(0, 1, (half_batch, 100)) # Generate a half batch of images gen_imgs = self.generator.predict(noise) valid = np.concatenate((np.ones((half_batch, 1)), np.zeros((half_batch, 1))), axis=1) fake = np.concatenate((np.zeros((half_batch, 1)), np.ones((half_batch, 1))), axis=1) # Train the discriminator d_loss_real, d_acc_real = self.discriminator.train_on_batch(imgs, valid) d_loss_fake, d_acc_fake = self.discriminator.train_on_batch(gen_imgs, fake) d_loss = 0.5 * (d_loss_real + d_loss_fake) d_acc = 0.5 * (d_acc_real + d_acc_fake) # --------------------- # Train Generator # --------------------- # We only want to train the generator for the combined model self.discriminator.set_trainable(False) # Sample noise and use as generator input noise = np.random.normal(0, 1, (batch_size, self.latent_dim)) # The generator wants the discriminator to label the generated samples as valid valid = np.concatenate((np.ones((batch_size, 1)), np.zeros((batch_size, 1))), axis=1) # Train the generator g_loss, g_acc = self.combined.train_on_batch(noise, valid) # Display the progress print ("%d [D loss: %f, acc: %.2f%%] [G loss: %f, acc: %.2f%%]" % (epoch, d_loss, 100*d_acc, g_loss, 100*g_acc)) # If at save interval => save generated image samples if epoch % save_interval == 0: self.save_imgs(epoch) def save_imgs(self, epoch): r, c = 5, 5 noise = np.random.normal(0, 1, (r * c, 100)) gen_imgs = self.generator.predict(noise) # Rescale images 0 - 1 (from -1 to 1) gen_imgs = 0.5 * (gen_imgs + 1) fig, axs = plt.subplots(r, c) plt.suptitle("Deep Convolutional Generative Adversarial Network") cnt = 0 for i in range(r): for j in range(c): axs[i,j].imshow(gen_imgs[cnt,0,:,:], cmap='gray') axs[i,j].axis('off') cnt += 1 fig.savefig("mnist_%d.png" % epoch) plt.close() if __name__ == '__main__': dcgan = DCGAN() dcgan.train(epochs=200000, batch_size=64, save_interval=50) ================================================ FILE: mlfromscratch/unsupervised_learning/fp_growth.py ================================================ from __future__ import division, print_function import numpy as np import itertools class FPTreeNode(): def __init__(self, item=None, support=1): # 'Value' of the item self.item = item # Number of times the item occurs in a # transaction self.support = support # Child nodes in the FP Growth Tree self.children = {} class FPGrowth(): """A method for determining frequent itemsets in a transactional database. This is done by building a so called FP Growth tree, which can then be mined to collect the frequent itemsets. More effective than Apriori for large transactional databases. Parameters: ----------- min_sup: float The minimum fraction of transactions an itemets needs to occur in to be deemed frequent """ def __init__(self, min_sup=0.3): self.min_sup = min_sup # The root of the initial FP Growth Tree self.tree_root = None # Prefixes of itemsets in the FP Growth Tree self.prefixes = {} self.frequent_itemsets = [] # Count the number of transactions that contains item. def _calculate_support(self, item, transactions): count = 0 for transaction in transactions: if item in transaction: count += 1 support = count return support def _get_frequent_items(self, transactions): """ Returns a set of frequent items. An item is determined to be frequent if there are atleast min_sup transactions that contains it. """ # Get all unique items in the transactions unique_items = set( item for transaction in transactions for item in transaction) items = [] for item in unique_items: sup = self._calculate_support(item, transactions) if sup >= self.min_sup: items.append([item, sup]) # Sort by support - Highest to lowest items.sort(key=lambda item: item[1], reverse=True) frequent_items = [[el[0]] for el in items] # Only return the items return frequent_items def _insert_tree(self, node, children): """ Recursive method which adds nodes to the tree. """ if not children: return # Create new node as the first item in children list child_item = children[0] child = FPTreeNode(item=child_item) # If parent already contains item => increase the support if child_item in node.children: node.children[child.item].support += 1 else: node.children[child.item] = child # Execute _insert_tree on the rest of the children list # from the new node self._insert_tree(node.children[child.item], children[1:]) def _construct_tree(self, transactions, frequent_items=None): if not frequent_items: # Get frequent items sorted by support frequent_items = self._get_frequent_items(transactions) unique_frequent_items = list( set(item for itemset in frequent_items for item in itemset)) # Construct the root of the FP Growth tree root = FPTreeNode() for transaction in transactions: # Remove items that are not frequent according to # unique_frequent_items transaction = [item for item in transaction if item in unique_frequent_items] transaction.sort(key=lambda item: frequent_items.index([item])) self._insert_tree(root, transaction) return root def print_tree(self, node=None, indent_times=0): """ Recursive method which prints the FP Growth Tree """ if not node: node = self.tree_root indent = " " * indent_times print ("%s%s:%s" % (indent, node.item, node.support)) for child_key in node.children: child = node.children[child_key] self.print_tree(child, indent_times + 1) def _is_prefix(self, itemset, node): """ Makes sure that the first item in itemset is a child of node and that every following item in itemset is reachable via that path """ for item in itemset: if not item in node.children: return False node = node.children[item] return True def _determine_prefixes(self, itemset, node, prefixes=None): """ Recursive method that adds prefixes to the itemset by traversing the FP Growth Tree""" if not prefixes: prefixes = [] # If the current node is a prefix to the itemset # add the current prefixes value as prefix to the itemset if self._is_prefix(itemset, node): itemset_key = self._get_itemset_key(itemset) if not itemset_key in self.prefixes: self.prefixes[itemset_key] = [] self.prefixes[itemset_key] += [{"prefix": prefixes, "support": node.children[itemset[0]].support}] for child_key in node.children: child = node.children[child_key] # Recursive call with child as new node. Add the child item as potential # prefix. self._determine_prefixes(itemset, child, prefixes + [child.item]) def _get_itemset_key(self, itemset): """ Determines the look of the hashmap key for self.prefixes List of more strings than one gets joined by '-' """ if len(itemset) > 1: itemset_key = "-".join(itemset) else: itemset_key = str(itemset[0]) return itemset_key def _determine_frequent_itemsets(self, conditional_database, suffix): # Calculate new frequent items from the conditional database # of suffix frequent_items = self._get_frequent_items(conditional_database) cond_tree = None if suffix: cond_tree = self._construct_tree(conditional_database, frequent_items) # Output new frequent itemset as the suffix added to the frequent # items self.frequent_itemsets += [el + suffix for el in frequent_items] # Find larger frequent itemset by finding prefixes # of the frequent items in the FP Growth Tree for the conditional # database. self.prefixes = {} for itemset in frequent_items: # If no suffix (first run) if not cond_tree: cond_tree = self.tree_root # Determine prefixes to itemset self._determine_prefixes(itemset, cond_tree) conditional_database = [] itemset_key = self._get_itemset_key(itemset) # Build new conditional database if itemset_key in self.prefixes: for el in self.prefixes[itemset_key]: # If support = 4 => add 4 of the corresponding prefix set for _ in range(el["support"]): conditional_database.append(el["prefix"]) # Create new suffix new_suffix = itemset + suffix if suffix else itemset self._determine_frequent_itemsets(conditional_database, suffix=new_suffix) def find_frequent_itemsets(self, transactions, suffix=None, show_tree=False): self.transactions = transactions # Build the FP Growth Tree self.tree_root = self._construct_tree(transactions) if show_tree: print ("FP-Growth Tree:") self.print_tree(self.tree_root) self._determine_frequent_itemsets(transactions, suffix=None) return self.frequent_itemsets ================================================ FILE: mlfromscratch/unsupervised_learning/gaussian_mixture_model.py ================================================ from __future__ import division, print_function import math from sklearn import datasets import numpy as np from mlfromscratch.utils import normalize, euclidean_distance, calculate_covariance_matrix from mlfromscratch.utils import Plot class GaussianMixtureModel(): """A probabilistic clustering method for determining groupings among data samples. Parameters: ----------- k: int The number of clusters the algorithm will form. max_iterations: int The number of iterations the algorithm will run for if it does not converge before that. tolerance: float If the difference of the results from one iteration to the next is smaller than this value we will say that the algorithm has converged. """ def __init__(self, k=2, max_iterations=2000, tolerance=1e-8): self.k = k self.parameters = [] self.max_iterations = max_iterations self.tolerance = tolerance self.responsibilities = [] self.sample_assignments = None self.responsibility = None def _init_random_gaussians(self, X): """ Initialize gaussian randomly """ n_samples = np.shape(X)[0] self.priors = (1 / self.k) * np.ones(self.k) for i in range(self.k): params = {} params["mean"] = X[np.random.choice(range(n_samples))] params["cov"] = calculate_covariance_matrix(X) self.parameters.append(params) def multivariate_gaussian(self, X, params): """ Likelihood """ n_features = np.shape(X)[1] mean = params["mean"] covar = params["cov"] determinant = np.linalg.det(covar) likelihoods = np.zeros(np.shape(X)[0]) for i, sample in enumerate(X): d = n_features # dimension coeff = (1.0 / (math.pow((2.0 * math.pi), d / 2) * math.sqrt(determinant))) exponent = math.exp(-0.5 * (sample - mean).T.dot(np.linalg.pinv(covar)).dot((sample - mean))) likelihoods[i] = coeff * exponent return likelihoods def _get_likelihoods(self, X): """ Calculate the likelihood over all samples """ n_samples = np.shape(X)[0] likelihoods = np.zeros((n_samples, self.k)) for i in range(self.k): likelihoods[ :, i] = self.multivariate_gaussian( X, self.parameters[i]) return likelihoods def _expectation(self, X): """ Calculate the responsibility """ # Calculate probabilities of X belonging to the different clusters weighted_likelihoods = self._get_likelihoods(X) * self.priors sum_likelihoods = np.expand_dims( np.sum(weighted_likelihoods, axis=1), axis=1) # Determine responsibility as P(X|y)*P(y)/P(X) self.responsibility = weighted_likelihoods / sum_likelihoods # Assign samples to cluster that has largest probability self.sample_assignments = self.responsibility.argmax(axis=1) # Save value for convergence check self.responsibilities.append(np.max(self.responsibility, axis=1)) def _maximization(self, X): """ Update the parameters and priors """ # Iterate through clusters and recalculate mean and covariance for i in range(self.k): resp = np.expand_dims(self.responsibility[:, i], axis=1) mean = (resp * X).sum(axis=0) / resp.sum() covariance = (X - mean).T.dot((X - mean) * resp) / resp.sum() self.parameters[i]["mean"], self.parameters[ i]["cov"] = mean, covariance # Update weights n_samples = np.shape(X)[0] self.priors = self.responsibility.sum(axis=0) / n_samples def _converged(self, X): """ Covergence if || likehood - last_likelihood || < tolerance """ if len(self.responsibilities) < 2: return False diff = np.linalg.norm( self.responsibilities[-1] - self.responsibilities[-2]) # print ("Likelihood update: %s (tol: %s)" % (diff, self.tolerance)) return diff <= self.tolerance def predict(self, X): """ Run GMM and return the cluster indices """ # Initialize the gaussians randomly self._init_random_gaussians(X) # Run EM until convergence or for max iterations for _ in range(self.max_iterations): self._expectation(X) # E-step self._maximization(X) # M-step # Check convergence if self._converged(X): break # Make new assignments and return them self._expectation(X) return self.sample_assignments ================================================ FILE: mlfromscratch/unsupervised_learning/generative_adversarial_network.py ================================================ from __future__ import print_function, division from sklearn import datasets import math import matplotlib.pyplot as plt import numpy as np import progressbar from sklearn.datasets import fetch_mldata from mlfromscratch.deep_learning.optimizers import Adam from mlfromscratch.deep_learning.loss_functions import CrossEntropy from mlfromscratch.deep_learning.layers import Dense, Dropout, Flatten, Activation, Reshape, BatchNormalization from mlfromscratch.deep_learning import NeuralNetwork class GAN(): """A Generative Adversarial Network with deep fully-connected neural nets as Generator and Discriminator. Training Data: MNIST Handwritten Digits (28x28 images) """ def __init__(self): self.img_rows = 28 self.img_cols = 28 self.img_dim = self.img_rows * self.img_cols self.latent_dim = 100 optimizer = Adam(learning_rate=0.0002, b1=0.5) loss_function = CrossEntropy # Build the discriminator self.discriminator = self.build_discriminator(optimizer, loss_function) # Build the generator self.generator = self.build_generator(optimizer, loss_function) # Build the combined model self.combined = NeuralNetwork(optimizer=optimizer, loss=loss_function) self.combined.layers.extend(self.generator.layers) self.combined.layers.extend(self.discriminator.layers) print () self.generator.summary(name="Generator") self.discriminator.summary(name="Discriminator") def build_generator(self, optimizer, loss_function): model = NeuralNetwork(optimizer=optimizer, loss=loss_function) model.add(Dense(256, input_shape=(self.latent_dim,))) model.add(Activation('leaky_relu')) model.add(BatchNormalization(momentum=0.8)) model.add(Dense(512)) model.add(Activation('leaky_relu')) model.add(BatchNormalization(momentum=0.8)) model.add(Dense(1024)) model.add(Activation('leaky_relu')) model.add(BatchNormalization(momentum=0.8)) model.add(Dense(self.img_dim)) model.add(Activation('tanh')) return model def build_discriminator(self, optimizer, loss_function): model = NeuralNetwork(optimizer=optimizer, loss=loss_function) model.add(Dense(512, input_shape=(self.img_dim,))) model.add(Activation('leaky_relu')) model.add(Dropout(0.5)) model.add(Dense(256)) model.add(Activation('leaky_relu')) model.add(Dropout(0.5)) model.add(Dense(2)) model.add(Activation('softmax')) return model def train(self, n_epochs, batch_size=128, save_interval=50): mnist = fetch_mldata('MNIST original') X = mnist.data y = mnist.target # Rescale [-1, 1] X = (X.astype(np.float32) - 127.5) / 127.5 half_batch = int(batch_size / 2) for epoch in range(n_epochs): # --------------------- # Train Discriminator # --------------------- self.discriminator.set_trainable(True) # Select a random half batch of images idx = np.random.randint(0, X.shape[0], half_batch) imgs = X[idx] # Sample noise to use as generator input noise = np.random.normal(0, 1, (half_batch, self.latent_dim)) # Generate a half batch of images gen_imgs = self.generator.predict(noise) # Valid = [1, 0], Fake = [0, 1] valid = np.concatenate((np.ones((half_batch, 1)), np.zeros((half_batch, 1))), axis=1) fake = np.concatenate((np.zeros((half_batch, 1)), np.ones((half_batch, 1))), axis=1) # Train the discriminator d_loss_real, d_acc_real = self.discriminator.train_on_batch(imgs, valid) d_loss_fake, d_acc_fake = self.discriminator.train_on_batch(gen_imgs, fake) d_loss = 0.5 * (d_loss_real + d_loss_fake) d_acc = 0.5 * (d_acc_real + d_acc_fake) # --------------------- # Train Generator # --------------------- # We only want to train the generator for the combined model self.discriminator.set_trainable(False) # Sample noise and use as generator input noise = np.random.normal(0, 1, (batch_size, self.latent_dim)) # The generator wants the discriminator to label the generated samples as valid valid = np.concatenate((np.ones((batch_size, 1)), np.zeros((batch_size, 1))), axis=1) # Train the generator g_loss, g_acc = self.combined.train_on_batch(noise, valid) # Display the progress print ("%d [D loss: %f, acc: %.2f%%] [G loss: %f, acc: %.2f%%]" % (epoch, d_loss, 100*d_acc, g_loss, 100*g_acc)) # If at save interval => save generated image samples if epoch % save_interval == 0: self.save_imgs(epoch) def save_imgs(self, epoch): r, c = 5, 5 # Grid size noise = np.random.normal(0, 1, (r * c, self.latent_dim)) # Generate images and reshape to image shape gen_imgs = self.generator.predict(noise).reshape((-1, self.img_rows, self.img_cols)) # Rescale images 0 - 1 gen_imgs = 0.5 * gen_imgs + 0.5 fig, axs = plt.subplots(r, c) plt.suptitle("Generative Adversarial Network") cnt = 0 for i in range(r): for j in range(c): axs[i,j].imshow(gen_imgs[cnt,:,:], cmap='gray') axs[i,j].axis('off') cnt += 1 fig.savefig("mnist_%d.png" % epoch) plt.close() if __name__ == '__main__': gan = GAN() gan.train(n_epochs=200000, batch_size=64, save_interval=400) ================================================ FILE: mlfromscratch/unsupervised_learning/genetic_algorithm.py ================================================ from __future__ import print_function, division import string import numpy as np class GeneticAlgorithm(): """An implementation of a Genetic Algorithm which will try to produce the user specified target string. Parameters: ----------- target_string: string The string which the GA should try to produce. population_size: int The number of individuals (possible solutions) in the population. mutation_rate: float The rate (or probability) of which the alleles (chars in this case) should be randomly changed. """ def __init__(self, target_string, population_size, mutation_rate): self.target = target_string self.population_size = population_size self.mutation_rate = mutation_rate self.letters = [" "] + list(string.ascii_letters) def _initialize(self): """ Initialize population with random strings """ self.population = [] for _ in range(self.population_size): # Select random letters as new individual individual = "".join(np.random.choice(self.letters, size=len(self.target))) self.population.append(individual) def _calculate_fitness(self): """ Calculates the fitness of each individual in the population """ population_fitness = [] for individual in self.population: # Calculate loss as the alphabetical distance between # the characters in the individual and the target string loss = 0 for i in range(len(individual)): letter_i1 = self.letters.index(individual[i]) letter_i2 = self.letters.index(self.target[i]) loss += abs(letter_i1 - letter_i2) fitness = 1 / (loss + 1e-6) population_fitness.append(fitness) return population_fitness def _mutate(self, individual): """ Randomly change the individual's characters with probability self.mutation_rate """ individual = list(individual) for j in range(len(individual)): # Make change with probability mutation_rate if np.random.random() < self.mutation_rate: individual[j] = np.random.choice(self.letters) # Return mutated individual as string return "".join(individual) def _crossover(self, parent1, parent2): """ Create children from parents by crossover """ # Select random crossover point cross_i = np.random.randint(0, len(parent1)) child1 = parent1[:cross_i] + parent2[cross_i:] child2 = parent2[:cross_i] + parent1[cross_i:] return child1, child2 def run(self, iterations): # Initialize new population self._initialize() for epoch in range(iterations): population_fitness = self._calculate_fitness() fittest_individual = self.population[np.argmax(population_fitness)] highest_fitness = max(population_fitness) # If we have found individual which matches the target => Done if fittest_individual == self.target: break # Set the probability that the individual should be selected as a parent # proportionate to the individual's fitness. parent_probabilities = [fitness / sum(population_fitness) for fitness in population_fitness] # Determine the next generation new_population = [] for i in np.arange(0, self.population_size, 2): # Select two parents randomly according to probabilities parent1, parent2 = np.random.choice(self.population, size=2, p=parent_probabilities, replace=False) # Perform crossover to produce offspring child1, child2 = self._crossover(parent1, parent2) # Save mutated offspring for next generation new_population += [self._mutate(child1), self._mutate(child2)] print ("[%d Closest Candidate: '%s', Fitness: %.2f]" % (epoch, fittest_individual, highest_fitness)) self.population = new_population print ("[%d Answer: '%s']" % (epoch, fittest_individual)) ================================================ FILE: mlfromscratch/unsupervised_learning/k_means.py ================================================ from __future__ import print_function, division import numpy as np from mlfromscratch.utils import normalize, euclidean_distance, Plot from mlfromscratch.unsupervised_learning import * class KMeans(): """A simple clustering method that forms k clusters by iteratively reassigning samples to the closest centroids and after that moves the centroids to the center of the new formed clusters. Parameters: ----------- k: int The number of clusters the algorithm will form. max_iterations: int The number of iterations the algorithm will run for if it does not converge before that. """ def __init__(self, k=2, max_iterations=500): self.k = k self.max_iterations = max_iterations def _init_random_centroids(self, X): """ Initialize the centroids as k random samples of X""" n_samples, n_features = np.shape(X) centroids = np.zeros((self.k, n_features)) for i in range(self.k): centroid = X[np.random.choice(range(n_samples))] centroids[i] = centroid return centroids def _closest_centroid(self, sample, centroids): """ Return the index of the closest centroid to the sample """ closest_i = 0 closest_dist = float('inf') for i, centroid in enumerate(centroids): distance = euclidean_distance(sample, centroid) if distance < closest_dist: closest_i = i closest_dist = distance return closest_i def _create_clusters(self, centroids, X): """ Assign the samples to the closest centroids to create clusters """ n_samples = np.shape(X)[0] clusters = [[] for _ in range(self.k)] for sample_i, sample in enumerate(X): centroid_i = self._closest_centroid(sample, centroids) clusters[centroid_i].append(sample_i) return clusters def _calculate_centroids(self, clusters, X): """ Calculate new centroids as the means of the samples in each cluster """ n_features = np.shape(X)[1] centroids = np.zeros((self.k, n_features)) for i, cluster in enumerate(clusters): centroid = np.mean(X[cluster], axis=0) centroids[i] = centroid return centroids def _get_cluster_labels(self, clusters, X): """ Classify samples as the index of their clusters """ # One prediction for each sample y_pred = np.zeros(np.shape(X)[0]) for cluster_i, cluster in enumerate(clusters): for sample_i in cluster: y_pred[sample_i] = cluster_i return y_pred def predict(self, X): """ Do K-Means clustering and return cluster indices """ # Initialize centroids as k random samples from X centroids = self._init_random_centroids(X) # Iterate until convergence or for max iterations for _ in range(self.max_iterations): # Assign samples to closest centroids (create clusters) clusters = self._create_clusters(centroids, X) # Save current centroids for convergence check prev_centroids = centroids # Calculate new centroids from the clusters centroids = self._calculate_centroids(clusters, X) # If no centroids have changed => convergence diff = centroids - prev_centroids if not diff.any(): break return self._get_cluster_labels(clusters, X) ================================================ FILE: mlfromscratch/unsupervised_learning/partitioning_around_medoids.py ================================================ from __future__ import print_function, division import numpy as np from mlfromscratch.utils import normalize, euclidean_distance, Plot from mlfromscratch.unsupervised_learning import PCA class PAM(): """A simple clustering method that forms k clusters by first assigning samples to the closest medoids, and then swapping medoids with non-medoid samples if the total distance (cost) between the cluster members and their medoid is smaller than prevoisly. Parameters: ----------- k: int The number of clusters the algorithm will form. """ def __init__(self, k=2): self.k = k def _init_random_medoids(self, X): """ Initialize the medoids as random samples """ n_samples, n_features = np.shape(X) medoids = np.zeros((self.k, n_features)) for i in range(self.k): medoid = X[np.random.choice(range(n_samples))] medoids[i] = medoid return medoids def _closest_medoid(self, sample, medoids): """ Return the index of the closest medoid to the sample """ closest_i = None closest_distance = float("inf") for i, medoid in enumerate(medoids): distance = euclidean_distance(sample, medoid) if distance < closest_distance: closest_i = i closest_distance = distance return closest_i def _create_clusters(self, X, medoids): """ Assign the samples to the closest medoids to create clusters """ clusters = [[] for _ in range(self.k)] for sample_i, sample in enumerate(X): medoid_i = self._closest_medoid(sample, medoids) clusters[medoid_i].append(sample_i) return clusters def _calculate_cost(self, X, clusters, medoids): """ Calculate the cost (total distance between samples and their medoids) """ cost = 0 # For each cluster for i, cluster in enumerate(clusters): medoid = medoids[i] for sample_i in cluster: # Add distance between sample and medoid as cost cost += euclidean_distance(X[sample_i], medoid) return cost def _get_non_medoids(self, X, medoids): """ Returns a list of all samples that are not currently medoids """ non_medoids = [] for sample in X: if not sample in medoids: non_medoids.append(sample) return non_medoids def _get_cluster_labels(self, clusters, X): """ Classify samples as the index of their clusters """ # One prediction for each sample y_pred = np.zeros(np.shape(X)[0]) for cluster_i in range(len(clusters)): cluster = clusters[cluster_i] for sample_i in cluster: y_pred[sample_i] = cluster_i return y_pred def predict(self, X): """ Do Partitioning Around Medoids and return the cluster labels """ # Initialize medoids randomly medoids = self._init_random_medoids(X) # Assign samples to closest medoids clusters = self._create_clusters(X, medoids) # Calculate the initial cost (total distance between samples and # corresponding medoids) cost = self._calculate_cost(X, clusters, medoids) # Iterate until we no longer have a cheaper cost while True: best_medoids = medoids lowest_cost = cost for medoid in medoids: # Get all non-medoid samples non_medoids = self._get_non_medoids(X, medoids) # Calculate the cost when swapping medoid and samples for sample in non_medoids: # Swap sample with the medoid new_medoids = medoids.copy() new_medoids[medoids == medoid] = sample # Assign samples to new medoids new_clusters = self._create_clusters(X, new_medoids) # Calculate the cost with the new set of medoids new_cost = self._calculate_cost( X, new_clusters, new_medoids) # If the swap gives us a lower cost we save the medoids and cost if new_cost < lowest_cost: lowest_cost = new_cost best_medoids = new_medoids # If there was a swap that resultet in a lower cost we save the # resulting medoids from the best swap and the new cost if lowest_cost < cost: cost = lowest_cost medoids = best_medoids # Else finished else: break final_clusters = self._create_clusters(X, medoids) # Return the samples cluster indices as labels return self._get_cluster_labels(final_clusters, X) ================================================ FILE: mlfromscratch/unsupervised_learning/principal_component_analysis.py ================================================ from __future__ import print_function, division import numpy as np from mlfromscratch.utils import calculate_covariance_matrix class PCA(): """A method for doing dimensionality reduction by transforming the feature space to a lower dimensionality, removing correlation between features and maximizing the variance along each feature axis. This class is also used throughout the project to plot data. """ def transform(self, X, n_components): """ Fit the dataset to the number of principal components specified in the constructor and return the transformed dataset """ covariance_matrix = calculate_covariance_matrix(X) # Where (eigenvector[:,0] corresponds to eigenvalue[0]) eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix) # Sort the eigenvalues and corresponding eigenvectors from largest # to smallest eigenvalue and select the first n_components idx = eigenvalues.argsort()[::-1] eigenvalues = eigenvalues[idx][:n_components] eigenvectors = np.atleast_1d(eigenvectors[:, idx])[:, :n_components] # Project the data onto principal components X_transformed = X.dot(eigenvectors) return X_transformed ================================================ FILE: mlfromscratch/unsupervised_learning/restricted_boltzmann_machine.py ================================================ import logging import numpy as np import progressbar from mlfromscratch.utils.misc import bar_widgets from mlfromscratch.utils import batch_iterator from mlfromscratch.deep_learning.activation_functions import Sigmoid sigmoid = Sigmoid() class RBM(): """Bernoulli Restricted Boltzmann Machine (RBM) Parameters: ----------- n_hidden: int: The number of processing nodes (neurons) in the hidden layer. learning_rate: float The step length that will be used when updating the weights. batch_size: int The size of the mini-batch used to calculate each weight update. n_iterations: float The number of training iterations the algorithm will tune the weights for. Reference: A Practical Guide to Training Restricted Boltzmann Machines URL: https://www.cs.toronto.edu/~hinton/absps/guideTR.pdf """ def __init__(self, n_hidden=128, learning_rate=0.1, batch_size=10, n_iterations=100): self.n_iterations = n_iterations self.batch_size = batch_size self.lr = learning_rate self.n_hidden = n_hidden self.progressbar = progressbar.ProgressBar(widgets=bar_widgets) def _initialize_weights(self, X): n_visible = X.shape[1] self.W = np.random.normal(scale=0.1, size=(n_visible, self.n_hidden)) self.v0 = np.zeros(n_visible) # Bias visible self.h0 = np.zeros(self.n_hidden) # Bias hidden def fit(self, X, y=None): '''Contrastive Divergence training procedure''' self._initialize_weights(X) self.training_errors = [] self.training_reconstructions = [] for _ in self.progressbar(range(self.n_iterations)): batch_errors = [] for batch in batch_iterator(X, batch_size=self.batch_size): # Positive phase positive_hidden = sigmoid(batch.dot(self.W) + self.h0) hidden_states = self._sample(positive_hidden) positive_associations = batch.T.dot(positive_hidden) # Negative phase negative_visible = sigmoid(hidden_states.dot(self.W.T) + self.v0) negative_visible = self._sample(negative_visible) negative_hidden = sigmoid(negative_visible.dot(self.W) + self.h0) negative_associations = negative_visible.T.dot(negative_hidden) self.W += self.lr * (positive_associations - negative_associations) self.h0 += self.lr * (positive_hidden.sum(axis=0) - negative_hidden.sum(axis=0)) self.v0 += self.lr * (batch.sum(axis=0) - negative_visible.sum(axis=0)) batch_errors.append(np.mean((batch - negative_visible) ** 2)) self.training_errors.append(np.mean(batch_errors)) # Reconstruct a batch of images from the training set idx = np.random.choice(range(X.shape[0]), self.batch_size) self.training_reconstructions.append(self.reconstruct(X[idx])) def _sample(self, X): return X > np.random.random_sample(size=X.shape) def reconstruct(self, X): positive_hidden = sigmoid(X.dot(self.W) + self.h0) hidden_states = self._sample(positive_hidden) negative_visible = sigmoid(hidden_states.dot(self.W.T) + self.v0) return negative_visible ================================================ FILE: mlfromscratch/utils/__init__.py ================================================ from .misc import Plot from .data_manipulation import * from .data_operation import * ================================================ FILE: mlfromscratch/utils/data_manipulation.py ================================================ from __future__ import division from itertools import combinations_with_replacement import numpy as np import math import sys def shuffle_data(X, y, seed=None): """ Random shuffle of the samples in X and y """ if seed: np.random.seed(seed) idx = np.arange(X.shape[0]) np.random.shuffle(idx) return X[idx], y[idx] def batch_iterator(X, y=None, batch_size=64): """ Simple batch generator """ n_samples = X.shape[0] for i in np.arange(0, n_samples, batch_size): begin, end = i, min(i+batch_size, n_samples) if y is not None: yield X[begin:end], y[begin:end] else: yield X[begin:end] def divide_on_feature(X, feature_i, threshold): """ Divide dataset based on if sample value on feature index is larger than the given threshold """ split_func = None if isinstance(threshold, int) or isinstance(threshold, float): split_func = lambda sample: sample[feature_i] >= threshold else: split_func = lambda sample: sample[feature_i] == threshold X_1 = np.array([sample for sample in X if split_func(sample)]) X_2 = np.array([sample for sample in X if not split_func(sample)]) return np.array([X_1, X_2]) def polynomial_features(X, degree): n_samples, n_features = np.shape(X) def index_combinations(): combs = [combinations_with_replacement(range(n_features), i) for i in range(0, degree + 1)] flat_combs = [item for sublist in combs for item in sublist] return flat_combs combinations = index_combinations() n_output_features = len(combinations) X_new = np.empty((n_samples, n_output_features)) for i, index_combs in enumerate(combinations): X_new[:, i] = np.prod(X[:, index_combs], axis=1) return X_new def get_random_subsets(X, y, n_subsets, replacements=True): """ Return random subsets (with replacements) of the data """ n_samples = np.shape(X)[0] # Concatenate x and y and do a random shuffle X_y = np.concatenate((X, y.reshape((1, len(y))).T), axis=1) np.random.shuffle(X_y) subsets = [] # Uses 50% of training samples without replacements subsample_size = int(n_samples // 2) if replacements: subsample_size = n_samples # 100% with replacements for _ in range(n_subsets): idx = np.random.choice( range(n_samples), size=np.shape(range(subsample_size)), replace=replacements) X = X_y[idx][:, :-1] y = X_y[idx][:, -1] subsets.append([X, y]) return subsets def normalize(X, axis=-1, order=2): """ Normalize the dataset X """ l2 = np.atleast_1d(np.linalg.norm(X, order, axis)) l2[l2 == 0] = 1 return X / np.expand_dims(l2, axis) def standardize(X): """ Standardize the dataset X """ X_std = X mean = X.mean(axis=0) std = X.std(axis=0) for col in range(np.shape(X)[1]): if std[col]: X_std[:, col] = (X_std[:, col] - mean[col]) / std[col] # X_std = (X - X.mean(axis=0)) / X.std(axis=0) return X_std def train_test_split(X, y, test_size=0.5, shuffle=True, seed=None): """ Split the data into train and test sets """ if shuffle: X, y = shuffle_data(X, y, seed) # Split the training data from test data in the ratio specified in # test_size split_i = len(y) - int(len(y) // (1 / test_size)) X_train, X_test = X[:split_i], X[split_i:] y_train, y_test = y[:split_i], y[split_i:] return X_train, X_test, y_train, y_test def k_fold_cross_validation_sets(X, y, k, shuffle=True): """ Split the data into k sets of training / test data """ if shuffle: X, y = shuffle_data(X, y) n_samples = len(y) left_overs = {} n_left_overs = (n_samples % k) if n_left_overs != 0: left_overs["X"] = X[-n_left_overs:] left_overs["y"] = y[-n_left_overs:] X = X[:-n_left_overs] y = y[:-n_left_overs] X_split = np.split(X, k) y_split = np.split(y, k) sets = [] for i in range(k): X_test, y_test = X_split[i], y_split[i] X_train = np.concatenate(X_split[:i] + X_split[i + 1:], axis=0) y_train = np.concatenate(y_split[:i] + y_split[i + 1:], axis=0) sets.append([X_train, X_test, y_train, y_test]) # Add left over samples to last set as training samples if n_left_overs != 0: np.append(sets[-1][0], left_overs["X"], axis=0) np.append(sets[-1][2], left_overs["y"], axis=0) return np.array(sets) def to_categorical(x, n_col=None): """ One-hot encoding of nominal values """ if not n_col: n_col = np.amax(x) + 1 one_hot = np.zeros((x.shape[0], n_col)) one_hot[np.arange(x.shape[0]), x] = 1 return one_hot def to_nominal(x): """ Conversion from one-hot encoding to nominal """ return np.argmax(x, axis=1) def make_diagonal(x): """ Converts a vector into an diagonal matrix """ m = np.zeros((len(x), len(x))) for i in range(len(m[0])): m[i, i] = x[i] return m ================================================ FILE: mlfromscratch/utils/data_operation.py ================================================ from __future__ import division import numpy as np import math import sys def calculate_entropy(y): """ Calculate the entropy of label array y """ log2 = lambda x: math.log(x) / math.log(2) unique_labels = np.unique(y) entropy = 0 for label in unique_labels: count = len(y[y == label]) p = count / len(y) entropy += -p * log2(p) return entropy def mean_squared_error(y_true, y_pred): """ Returns the mean squared error between y_true and y_pred """ mse = np.mean(np.power(y_true - y_pred, 2)) return mse def calculate_variance(X): """ Return the variance of the features in dataset X """ mean = np.ones(np.shape(X)) * X.mean(0) n_samples = np.shape(X)[0] variance = (1 / n_samples) * np.diag((X - mean).T.dot(X - mean)) return variance def calculate_std_dev(X): """ Calculate the standard deviations of the features in dataset X """ std_dev = np.sqrt(calculate_variance(X)) return std_dev def euclidean_distance(x1, x2): """ Calculates the l2 distance between two vectors """ distance = 0 # Squared distance between each coordinate for i in range(len(x1)): distance += pow((x1[i] - x2[i]), 2) return math.sqrt(distance) def accuracy_score(y_true, y_pred): """ Compare y_true to y_pred and return the accuracy """ accuracy = np.sum(y_true == y_pred, axis=0) / len(y_true) return accuracy def calculate_covariance_matrix(X, Y=None): """ Calculate the covariance matrix for the dataset X """ if Y is None: Y = X n_samples = np.shape(X)[0] covariance_matrix = (1 / (n_samples-1)) * (X - X.mean(axis=0)).T.dot(Y - Y.mean(axis=0)) return np.array(covariance_matrix, dtype=float) def calculate_correlation_matrix(X, Y=None): """ Calculate the correlation matrix for the dataset X """ if Y is None: Y = X n_samples = np.shape(X)[0] covariance = (1 / n_samples) * (X - X.mean(0)).T.dot(Y - Y.mean(0)) std_dev_X = np.expand_dims(calculate_std_dev(X), 1) std_dev_y = np.expand_dims(calculate_std_dev(Y), 1) correlation_matrix = np.divide(covariance, std_dev_X.dot(std_dev_y.T)) return np.array(correlation_matrix, dtype=float) ================================================ FILE: mlfromscratch/utils/kernels.py ================================================ import numpy as np def linear_kernel(**kwargs): def f(x1, x2): return np.inner(x1, x2) return f def polynomial_kernel(power, coef, **kwargs): def f(x1, x2): return (np.inner(x1, x2) + coef)**power return f def rbf_kernel(gamma, **kwargs): def f(x1, x2): distance = np.linalg.norm(x1 - x2) ** 2 return np.exp(-gamma * distance) return f ================================================ FILE: mlfromscratch/utils/misc.py ================================================ import progressbar from mpl_toolkits.mplot3d import Axes3D import matplotlib.pyplot as plt import matplotlib.cm as cmx import matplotlib.colors as colors import numpy as np from mlfromscratch.utils.data_operation import calculate_covariance_matrix from mlfromscratch.utils.data_operation import calculate_correlation_matrix from mlfromscratch.utils.data_manipulation import standardize bar_widgets = [ 'Training: ', progressbar.Percentage(), ' ', progressbar.Bar(marker="-", left="[", right="]"), ' ', progressbar.ETA() ] class Plot(): def __init__(self): self.cmap = plt.get_cmap('viridis') def _transform(self, X, dim): covariance = calculate_covariance_matrix(X) eigenvalues, eigenvectors = np.linalg.eig(covariance) # Sort eigenvalues and eigenvector by largest eigenvalues idx = eigenvalues.argsort()[::-1] eigenvalues = eigenvalues[idx][:dim] eigenvectors = np.atleast_1d(eigenvectors[:, idx])[:, :dim] # Project the data onto principal components X_transformed = X.dot(eigenvectors) return X_transformed def plot_regression(self, lines, title, axis_labels=None, mse=None, scatter=None, legend={"type": "lines", "loc": "lower right"}): if scatter: scatter_plots = scatter_labels = [] for s in scatter: scatter_plots += [plt.scatter(s["x"], s["y"], color=s["color"], s=s["size"])] scatter_labels += [s["label"]] scatter_plots = tuple(scatter_plots) scatter_labels = tuple(scatter_labels) for l in lines: li = plt.plot(l["x"], l["y"], color=s["color"], linewidth=l["width"], label=l["label"]) if mse: plt.suptitle(title) plt.title("MSE: %.2f" % mse, fontsize=10) else: plt.title(title) if axis_labels: plt.xlabel(axis_labels["x"]) plt.ylabel(axis_labels["y"]) if legend["type"] == "lines": plt.legend(loc="lower_left") elif legend["type"] == "scatter" and scatter: plt.legend(scatter_plots, scatter_labels, loc=legend["loc"]) plt.show() # Plot the dataset X and the corresponding labels y in 2D using PCA. def plot_in_2d(self, X, y=None, title=None, accuracy=None, legend_labels=None): X_transformed = self._transform(X, dim=2) x1 = X_transformed[:, 0] x2 = X_transformed[:, 1] class_distr = [] y = np.array(y).astype(int) colors = [self.cmap(i) for i in np.linspace(0, 1, len(np.unique(y)))] # Plot the different class distributions for i, l in enumerate(np.unique(y)): _x1 = x1[y == l] _x2 = x2[y == l] _y = y[y == l] class_distr.append(plt.scatter(_x1, _x2, color=colors[i])) # Plot legend if not legend_labels is None: plt.legend(class_distr, legend_labels, loc=1) # Plot title if title: if accuracy: perc = 100 * accuracy plt.suptitle(title) plt.title("Accuracy: %.1f%%" % perc, fontsize=10) else: plt.title(title) # Axis labels plt.xlabel('Principal Component 1') plt.ylabel('Principal Component 2') plt.show() # Plot the dataset X and the corresponding labels y in 3D using PCA. def plot_in_3d(self, X, y=None): X_transformed = self._transform(X, dim=3) x1 = X_transformed[:, 0] x2 = X_transformed[:, 1] x3 = X_transformed[:, 2] fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.scatter(x1, x2, x3, c=y) plt.show() ================================================ FILE: requirements.txt ================================================ matplotlib numpy sklearn pandas cvxopt scipy progressbar33 terminaltables gym ================================================ FILE: setup.cfg ================================================ [metadata] description-file = README.md [easy_install] ================================================ FILE: setup.py ================================================ from setuptools import setup, find_packages from codecs import open from os import path __version__ = '0.0.4' here = path.abspath(path.dirname(__file__)) # get the dependencies and installs with open(path.join(here, 'requirements.txt'), encoding='utf-8') as f: all_reqs = f.read().split('\n') install_requires = [x.strip() for x in all_reqs if 'git+' not in x] dependency_links = [x.strip().replace('git+', '') for x in all_reqs if x.startswith('git+')] setup( name='mlfromscratch', version=__version__, description='Python implementations of some of the fundamental Machine Learning models and algorithms from scratch.', url='https://github.com/eriklindernoren/ML-From-Scratch', download_url='https://github.com/eriklindernoren/ML-From-Scratch/tarball/master', license='MIT', packages=find_packages(), include_package_data=True, author='Erik Linder-Noren', install_requires=install_requires, setup_requires=['numpy>=1.10', 'scipy>=0.17'], dependency_links=dependency_links, author_email='eriklindernoren@gmail.com' )