Full Code of ddbourgin/numpy-ml for AI

master b0359af5285f cached

194 files

1.3 MB

332.9k tokens

1357 symbols

1 requests

Download .txt

Showing preview only (1,344K chars total). Download the full file or copy to clipboard to get everything.

Repository: ddbourgin/numpy-ml
Branch: master
Commit: b0359af5285f
Files: 194
Total size: 1.3 MB

Directory structure:
gitextract_t47luwfk/

├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   └── a--bug-performance-issue.md
│   └── PULL_REQUEST_TEMPLATE.md
├── .gitignore
├── .readthedocs.yml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── docs/
│   ├── Makefile
│   ├── README.md
│   ├── conf.py
│   ├── index.rst
│   ├── make.bat
│   ├── numpy_ml.bandits.bandits.rst
│   ├── numpy_ml.bandits.policies.rst
│   ├── numpy_ml.bandits.rst
│   ├── numpy_ml.bandits.trainer.rst
│   ├── numpy_ml.factorization.factors.rst
│   ├── numpy_ml.factorization.rst
│   ├── numpy_ml.gmm.gmm.rst
│   ├── numpy_ml.gmm.rst
│   ├── numpy_ml.hmm.MultinomialHMM.rst
│   ├── numpy_ml.hmm.rst
│   ├── numpy_ml.lda.lda.rst
│   ├── numpy_ml.lda.rst
│   ├── numpy_ml.lda.smoothed_lda.rst
│   ├── numpy_ml.linear_models.lm.rst
│   ├── numpy_ml.linear_models.rst
│   ├── numpy_ml.neural_nets.activations.rst
│   ├── numpy_ml.neural_nets.initializers.rst
│   ├── numpy_ml.neural_nets.layers.rst
│   ├── numpy_ml.neural_nets.losses.rst
│   ├── numpy_ml.neural_nets.models.rst
│   ├── numpy_ml.neural_nets.modules.rst
│   ├── numpy_ml.neural_nets.optimizers.rst
│   ├── numpy_ml.neural_nets.rst
│   ├── numpy_ml.neural_nets.schedulers.rst
│   ├── numpy_ml.neural_nets.utils.rst
│   ├── numpy_ml.neural_nets.wrappers.rst
│   ├── numpy_ml.ngram.additive.rst
│   ├── numpy_ml.ngram.goodturing.rst
│   ├── numpy_ml.ngram.mle.rst
│   ├── numpy_ml.ngram.rst
│   ├── numpy_ml.nonparametric.gp.rst
│   ├── numpy_ml.nonparametric.kernel_regression.rst
│   ├── numpy_ml.nonparametric.knn.rst
│   ├── numpy_ml.nonparametric.rst
│   ├── numpy_ml.preprocessing.dsp.rst
│   ├── numpy_ml.preprocessing.general.rst
│   ├── numpy_ml.preprocessing.nlp.rst
│   ├── numpy_ml.preprocessing.rst
│   ├── numpy_ml.rl_models.agents.rst
│   ├── numpy_ml.rl_models.rl_utils.rst
│   ├── numpy_ml.rl_models.rst
│   ├── numpy_ml.rl_models.trainer.rst
│   ├── numpy_ml.trees.dt.rst
│   ├── numpy_ml.trees.gbdt.rst
│   ├── numpy_ml.trees.losses.rst
│   ├── numpy_ml.trees.rf.rst
│   ├── numpy_ml.trees.rst
│   ├── numpy_ml.utils.data_structures.rst
│   ├── numpy_ml.utils.distance_metrics.rst
│   ├── numpy_ml.utils.graphs.rst
│   ├── numpy_ml.utils.kernels.rst
│   ├── numpy_ml.utils.rst
│   ├── numpy_ml.utils.testing.rst
│   ├── numpy_ml.utils.windows.rst
│   └── requirements.txt
├── numpy_ml/
│   ├── README.md
│   ├── __init__.py
│   ├── bandits/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── bandits.py
│   │   ├── policies.py
│   │   └── trainer.py
│   ├── factorization/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   └── factors.py
│   ├── gmm/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   └── gmm.py
│   ├── hmm/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   └── hmm.py
│   ├── lda/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── lda.py
│   │   └── lda_smoothed.py
│   ├── linear_models/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── bayesian_regression.py
│   │   ├── glm.py
│   │   ├── linear_regression.py
│   │   ├── logistic.py
│   │   ├── naive_bayes.py
│   │   └── ridge.py
│   ├── neural_nets/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── activations/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   └── activations.py
│   │   ├── initializers/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   └── initializers.py
│   │   ├── layers/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   └── layers.py
│   │   ├── losses/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   └── losses.py
│   │   ├── models/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── vae.py
│   │   │   ├── w2v.py
│   │   │   └── wgan_gp.py
│   │   ├── modules/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   └── modules.py
│   │   ├── optimizers/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   └── optimizers.py
│   │   ├── schedulers/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   └── schedulers.py
│   │   ├── utils/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   └── utils.py
│   │   └── wrappers/
│   │       ├── README.md
│   │       ├── __init__.py
│   │       └── wrappers.py
│   ├── ngram/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   └── ngram.py
│   ├── nonparametric/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── gp.py
│   │   ├── kernel_regression.py
│   │   └── knn.py
│   ├── plots/
│   │   ├── bandit_plots.py
│   │   ├── gmm_plots.py
│   │   ├── hmm_plots.py
│   │   ├── lda_plots.py
│   │   ├── lm_plots.py
│   │   ├── ngram_plots.py
│   │   ├── nn_activations_plots.py
│   │   ├── nn_schedulers_plots.py
│   │   ├── nonparametric_plots.py
│   │   ├── rl_plots.py
│   │   └── trees_plots.py
│   ├── preprocessing/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── dsp.py
│   │   ├── general.py
│   │   └── nlp.py
│   ├── rl_models/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── agents.py
│   │   ├── rl_utils.py
│   │   ├── tiles/
│   │   │   ├── __init__.py
│   │   │   └── tiles3.py
│   │   └── trainer.py
│   ├── tests/
│   │   ├── __init__.py
│   │   ├── nn_torch_models.py
│   │   ├── test_glm.py
│   │   ├── test_linear_regression.py
│   │   ├── test_naive_bayes.py
│   │   ├── test_ngram.py
│   │   ├── test_nn.py
│   │   ├── test_nn_activations.py
│   │   ├── test_nonparametric.py
│   │   ├── test_preprocessing.py
│   │   ├── test_trees.py
│   │   └── test_utils.py
│   ├── trees/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── dt.py
│   │   ├── gbdt.py
│   │   ├── losses.py
│   │   └── rf.py
│   └── utils/
│       ├── README.md
│       ├── __init__.py
│       ├── data_structures.py
│       ├── distance_metrics.py
│       ├── graphs.py
│       ├── kernels.py
│       ├── misc.py
│       ├── testing.py
│       └── windows.py
├── requirements-dev.txt
├── requirements-test.txt
├── requirements.txt
├── setup.py
└── tox.ini

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/ISSUE_TEMPLATE/a--bug-performance-issue.md
================================================
---
name: Bug/Performance Issue
about: Use this template for reporting a bug or a performance issue.
labels: bugfix
---

**System information**
- OS Platform and Distribution (e.g., Linux Ubuntu 16.04):
- Python version:
- NumPy version:

**Describe the current behavior**

**Describe the expected behavior**

**Code to reproduce the issue**
<!-- Provide a reproducible test case that is the bare minimum necessary to generate the problem. -->

**Other info / logs**
<!-- Include any logs or source code that would be helpful to diagnose the problem.
If including tracebacks, please include the full traceback. Large logs and files should be attached. -->


================================================
FILE: .github/PULL_REQUEST_TEMPLATE.md
================================================
### All Submissions

* [ ] Is the code you are submitting your own work?
* [ ] Have you followed the [contributing guidelines](https://github.com/ddbourgin/numpy-ml/CONTRIBUTING.md)?
* [ ] Have you checked to ensure there aren't other open [Pull Requests](https://github.com/ddbourgin/numpy-ml/pulls) for the same update/change?

### New Model Submissions

* [ ] Is the code you are submitting your own work?
* [ ] Did you properly attribute the authors of any code you referenced?
* [ ] Did you write unit tests for your new model?
* [ ] Does your submission pass the unit tests?
* [ ] Did you write documentation for your new model?
* [ ] Have you formatted your code using the [black](https://black.now.sh/) deaults?

### Changes to Existing Models

* [ ] Have you added an explanation of what your changes do and why you'd like us to include them?
* [ ] Have you written new tests for your changes, as applicable?
* [ ] Have you successfully ran tests with your changes locally?


================================================
FILE: .gitignore
================================================
### OSX ###
# General
.DS_Store
.AppleDouble
.LSOverride

# Icon must end with two \r
Icon

# Thumbnails
._*

# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent

# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk

### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

### Python Patch ###
.venv/

### Vim ###
# Swap
[._]*.s[a-v][a-z]
[._]*.sw[a-p]
[._]s[a-rt-v][a-z]
[._]ss[a-gi-z]
[._]sw[a-p]

# Session
Session.vim

# Temporary
.netrwhist
*~

# Auto-generated tag files
tags

# Persistent undo
[._]*.un~

# No pdfs
*.pdf

# No TODOs ;-)
TODO

_build
_static


================================================
FILE: .readthedocs.yml
================================================
# Read the Docs configuration file
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details

# Required
version: 2

# Build documentation in the docs/ directory with Sphinx
sphinx:
  configuration: docs/conf.py

# Build documentation with MkDocs
#mkdocs:
#  configuration: mkdocs.yml

# Optionally build your docs in additional formats such as PDF and ePub
formats:
    - htmlzip

# Optionally set the version of Python and requirements required to build your docs
python:
  version: 3.7
  install:
    - requirements: docs/requirements.txt


================================================
FILE: CODE_OF_CONDUCT.md
================================================
# NumPy-ML Code of Conduct

## Our Pledge

In the interest of fostering an open and welcoming environment, we as
contributors and maintainers pledge to making participation in our project and
our community a harassment-free experience for everyone, regardless of age, body
size, disability, ethnicity, sex characteristics, gender identity and expression,
level of experience, education, socio-economic status, nationality, personal
appearance, race, religion, or sexual identity and orientation.

## Our Standards

Examples of behavior that contributes to creating a positive environment
include:

* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members

Examples of unacceptable behavior by participants include:

* The use of sexualized language or imagery and unwelcome sexual attention or
  advances
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic
  address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a
  professional setting

## Our Responsibilities

Project maintainers are responsible for clarifying the standards of acceptable
behavior and are expected to take appropriate and fair corrective action in
response to any instances of unacceptable behavior.

Project maintainers have the right and responsibility to remove, edit, or
reject comments, commits, code, wiki edits, issues, and other contributions
that are not aligned to this Code of Conduct, or to ban temporarily or
permanently any contributor for other behaviors that they deem inappropriate,
threatening, offensive, or harmful.

## Scope

This Code of Conduct applies within all project spaces, and it also applies when
an individual is representing the project or its community in public spaces.
Examples of representing a project or community include using an official
project e-mail address, posting via an official social media account, or acting
as an appointed representative at an online or offline event. Representation of
a project may be further defined and clarified by project maintainers.

## Enforcement

Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported by contacting the project team at ddbourgin@gmail.com. All
complaints will be reviewed and investigated and will result in a response that
is deemed necessary and appropriate to the circumstances. The project team is
obligated to maintain confidentiality with regard to the reporter of an incident.
Further details of specific enforcement policies may be posted separately.

Project maintainers who do not follow or enforce the Code of Conduct in good
faith may face temporary or permanent repercussions as determined by other
members of the project's leadership.

## Attribution

This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html

[homepage]: https://www.contributor-covenant.org

For answers to common questions about this code of conduct, see
https://www.contributor-covenant.org/faq


================================================
FILE: CONTRIBUTING.md
================================================
## Contributing

Thank you for contributing to numpy-ml!

| <p align="center">⚠️ ⚠️ All PRs should reflect earnest attempts at implementing a model yourself. ⚠️⚠️ </p> It is fine to reference others' code. It is not fine to blindly copy without attribution. When in doubt, please ask. |
| --- |

### General guidelines
1. Please include a clear list of what you've done
2. For pull requests, please make sure all commits are [*atomic*](https://en.wikipedia.org/wiki/Atomic_commit) (i.e., one feature per commit)
3. If you're submitting a new model / feature / module, **please include proper documentation and unit tests.**
    - See the `test.py` file in one of the existing modules for examples of unit tests.
    - Documentation is loosely based on the [NumPy docstring style](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_numpy.html). When in doubt, refer to existing examples
4. Please format your code using the [black](https://github.com/python/black) defaults. You can use this [online formatter](https://black.now.sh/).

### Specific guidelines
#### I have a new model / model component to contribute
 Awesome - create a [pull request](https://github.com/ddbourgin/numpy-ml/pulls)! When preparing your PR, please include a brief description of the model, the canonical reference(s) in the literature, and, most importantly unit tests against an existing implementation!
  - Refer to the `test.py` file in one of the existing modules for examples.

#### I have a major new enhancement / adjustment that will affect multiple models
 Please post an [issue](https://github.com/ddbourgin/numpy-ml/issues) with your proposal before you begin working on it. When outlining your proposal, please include as much detail about your intended changes as possible.

#### I found a bug
 If there isn't already an [open issue](https://github.com/ddbourgin/numpy-ml/issues), please start one! When creating your issue, include:
  1. A title and clear description
  2. As much relevant information as possible
  3. A code sample demonstrating the expected behavior that is not occurring

#### I fixed a bug
 Thank you! Please open a new [pull request](https://github.com/ddbourgin/numpy-ml/pulls) with the patch. When doing so, ensure the PR description clearly describes the problem and solution. Include the relevant issue number if applicable.


================================================
FILE: LICENSE
================================================
                    GNU GENERAL PUBLIC LICENSE
                       Version 3, 29 June 2007

 Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
 Everyone is permitted to copy and distribute verbatim copies
 of this license document, but changing it is not allowed.

                            Preamble

  The GNU General Public License is a free, copyleft license for
software and other kinds of works.

  The licenses for most software and other practical works are designed
to take away your freedom to share and change the works.  By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users.  We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors.  You can apply it to
your programs, too.

  When we speak of free software, we are referring to freedom, not
price.  Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.

  To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights.  Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.

  For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received.  You must make sure that they, too, receive
or can get the source code.  And you must show them these terms so they
know their rights.

  Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.

  For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software.  For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.

  Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so.  This is fundamentally incompatible with the aim of
protecting users' freedom to change the software.  The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable.  Therefore, we
have designed this version of the GPL to prohibit the practice for those
products.  If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.

  Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary.  To prevent this, the GPL assures that
patents cannot be used to render the program non-free.

  The precise terms and conditions for copying, distribution and
modification follow.

                       TERMS AND CONDITIONS

  0. Definitions.

  "This License" refers to version 3 of the GNU General Public License.

  "Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.

  "The Program" refers to any copyrightable work licensed under this
License.  Each licensee is addressed as "you".  "Licensees" and
"recipients" may be individuals or organizations.

  To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy.  The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.

  A "covered work" means either the unmodified Program or a work based
on the Program.

  To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy.  Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.

  To "convey" a work means any kind of propagation that enables other
parties to make or receive copies.  Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.

  An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License.  If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.

  1. Source Code.

  The "source code" for a work means the preferred form of the work
for making modifications to it.  "Object code" means any non-source
form of a work.

  A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.

  The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form.  A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.

  The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities.  However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work.  For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.

  The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.

  The Corresponding Source for a work in source code form is that
same work.

  2. Basic Permissions.

  All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met.  This License explicitly affirms your unlimited
permission to run the unmodified Program.  The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work.  This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.

  You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force.  You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright.  Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.

  Conveying under any other circumstances is permitted solely under
the conditions stated below.  Sublicensing is not allowed; section 10
makes it unnecessary.

  3. Protecting Users' Legal Rights From Anti-Circumvention Law.

  No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.

  When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.

  4. Conveying Verbatim Copies.

  You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.

  You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.

  5. Conveying Modified Source Versions.

  You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:

    a) The work must carry prominent notices stating that you modified
    it, and giving a relevant date.

    b) The work must carry prominent notices stating that it is
    released under this License and any conditions added under section
    7.  This requirement modifies the requirement in section 4 to
    "keep intact all notices".

    c) You must license the entire work, as a whole, under this
    License to anyone who comes into possession of a copy.  This
    License will therefore apply, along with any applicable section 7
    additional terms, to the whole of the work, and all its parts,
    regardless of how they are packaged.  This License gives no
    permission to license the work in any other way, but it does not
    invalidate such permission if you have separately received it.

    d) If the work has interactive user interfaces, each must display
    Appropriate Legal Notices; however, if the Program has interactive
    interfaces that do not display Appropriate Legal Notices, your
    work need not make them do so.

  A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit.  Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.

  6. Conveying Non-Source Forms.

  You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:

    a) Convey the object code in, or embodied in, a physical product
    (including a physical distribution medium), accompanied by the
    Corresponding Source fixed on a durable physical medium
    customarily used for software interchange.

    b) Convey the object code in, or embodied in, a physical product
    (including a physical distribution medium), accompanied by a
    written offer, valid for at least three years and valid for as
    long as you offer spare parts or customer support for that product
    model, to give anyone who possesses the object code either (1) a
    copy of the Corresponding Source for all the software in the
    product that is covered by this License, on a durable physical
    medium customarily used for software interchange, for a price no
    more than your reasonable cost of physically performing this
    conveying of source, or (2) access to copy the
    Corresponding Source from a network server at no charge.

    c) Convey individual copies of the object code with a copy of the
    written offer to provide the Corresponding Source.  This
    alternative is allowed only occasionally and noncommercially, and
    only if you received the object code with such an offer, in accord
    with subsection 6b.

    d) Convey the object code by offering access from a designated
    place (gratis or for a charge), and offer equivalent access to the
    Corresponding Source in the same way through the same place at no
    further charge.  You need not require recipients to copy the
    Corresponding Source along with the object code.  If the place to
    copy the object code is a network server, the Corresponding Source
    may be on a different server (operated by you or a third party)
    that supports equivalent copying facilities, provided you maintain
    clear directions next to the object code saying where to find the
    Corresponding Source.  Regardless of what server hosts the
    Corresponding Source, you remain obligated to ensure that it is
    available for as long as needed to satisfy these requirements.

    e) Convey the object code using peer-to-peer transmission, provided
    you inform other peers where the object code and Corresponding
    Source of the work are being offered to the general public at no
    charge under subsection 6d.

  A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.

  A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling.  In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage.  For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product.  A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.

  "Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source.  The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.

  If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information.  But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).

  The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed.  Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.

  Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.

  7. Additional Terms.

  "Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law.  If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.

  When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it.  (Additional permissions may be written to require their own
removal in certain cases when you modify the work.)  You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.

  Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:

    a) Disclaiming warranty or limiting liability differently from the
    terms of sections 15 and 16 of this License; or

    b) Requiring preservation of specified reasonable legal notices or
    author attributions in that material or in the Appropriate Legal
    Notices displayed by works containing it; or

    c) Prohibiting misrepresentation of the origin of that material, or
    requiring that modified versions of such material be marked in
    reasonable ways as different from the original version; or

    d) Limiting the use for publicity purposes of names of licensors or
    authors of the material; or

    e) Declining to grant rights under trademark law for use of some
    trade names, trademarks, or service marks; or

    f) Requiring indemnification of licensors and authors of that
    material by anyone who conveys the material (or modified versions of
    it) with contractual assumptions of liability to the recipient, for
    any liability that these contractual assumptions directly impose on
    those licensors and authors.

  All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10.  If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term.  If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.

  If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.

  Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.

  8. Termination.

  You may not propagate or modify a covered work except as expressly
provided under this License.  Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).

  However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.

  Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.

  Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License.  If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.

  9. Acceptance Not Required for Having Copies.

  You are not required to accept this License in order to receive or
run a copy of the Program.  Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance.  However,
nothing other than this License grants you permission to propagate or
modify any covered work.  These actions infringe copyright if you do
not accept this License.  Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.

  10. Automatic Licensing of Downstream Recipients.

  Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License.  You are not responsible
for enforcing compliance by third parties with this License.

  An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations.  If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.

  You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License.  For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.

  11. Patents.

  A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based.  The
work thus licensed is called the contributor's "contributor version".

  A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version.  For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.

  Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.

  In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement).  To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.

  If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients.  "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.

  If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.

  A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License.  You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.

  Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.

  12. No Surrender of Others' Freedom.

  If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License.  If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all.  For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.

  13. Use with the GNU Affero General Public License.

  Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work.  The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.

  14. Revised Versions of this License.

  The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time.  Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.

  Each version is given a distinguishing version number.  If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation.  If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.

  If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.

  Later license versions may give you additional or different
permissions.  However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.

  15. Disclaimer of Warranty.

  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.

  16. Limitation of Liability.

  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.

  17. Interpretation of Sections 15 and 16.

  If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.

                     END OF TERMS AND CONDITIONS

            How to Apply These Terms to Your New Programs

  If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.

  To do so, attach the following notices to the program.  It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.

    <one line to give the program's name and a brief idea of what it does.>
    Copyright (C) <year>  <name of author>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.

Also add information on how to contact you by electronic and paper mail.

  If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:

    <program>  Copyright (C) <year>  <name of author>
    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
    This is free software, and you are welcome to redistribute it
    under certain conditions; type `show c' for details.

The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License.  Of course, your program's commands
might be different; for a GUI interface, you would use an "about box".

  You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU GPL, see
<https://www.gnu.org/licenses/>.

  The GNU General Public License does not permit incorporating your program
into proprietary programs.  If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library.  If this is what you want to do, use the GNU Lesser General
Public License instead of this License.  But first, please read
<https://www.gnu.org/licenses/why-not-lgpl.html>.


================================================
FILE: MANIFEST.in
================================================
include README.md
include requirements*.txt
include docs/*.rst
include docs/img/*.png


================================================
FILE: README.md
================================================
# numpy-ml
Ever wish you had an inefficient but somewhat legible collection of machine
learning algorithms implemented exclusively in NumPy? No?

## Installation

### For rapid experimentation
To use this code as a starting point for ML prototyping / experimentation, just clone the repository, create a new [virtualenv](https://pypi.org/project/virtualenv/), and start hacking:

```sh
$ git clone https://github.com/ddbourgin/numpy-ml.git
$ cd numpy-ml && virtualenv npml && source npml/bin/activate
$ pip3 install -r requirements-dev.txt
```

### As a package
If you don't plan to modify the source, you can also install numpy-ml as a
Python package: `pip3 install -u numpy_ml`.

The reinforcement learning agents train on environments defined in the [OpenAI
gym](https://github.com/openai/gym). To install these alongside numpy-ml, you
can use `pip3 install -u 'numpy_ml[rl]'`.

## Documentation
For more details on the available models, see the [project documentation](https://numpy-ml.readthedocs.io/).

## Available models
<details>
  <summary>Click to expand!</summary>

1. **Gaussian mixture model**
    - EM training

2. **Hidden Markov model**
    - Viterbi decoding
    - Likelihood computation
    - MLE parameter estimation via Baum-Welch/forward-backward algorithm

3. **Latent Dirichlet allocation** (topic model)
    - Standard model with MLE parameter estimation via variational EM
    - Smoothed model with MAP parameter estimation via MCMC

4. **Neural networks**
    * Layers / Layer-wise ops
        - Add
        - Flatten
        - Multiply
        - Softmax
        - Fully-connected/Dense
        - Sparse evolutionary connections
        - LSTM
        - Elman-style RNN
        - Max + average pooling
        - Dot-product attention
        - Embedding layer
        - Restricted Boltzmann machine (w. CD-n training)
        - 2D deconvolution (w. padding and stride)
        - 2D convolution (w. padding, dilation, and stride)
        - 1D convolution (w. padding, dilation, stride, and causality)
    * Modules
        - Bidirectional LSTM
        - ResNet-style residual blocks (identity and convolution)
        - WaveNet-style residual blocks with dilated causal convolutions
        - Transformer-style multi-headed scaled dot product attention
    * Regularizers
        - Dropout
    * Normalization
        - Batch normalization (spatial and temporal)
        - Layer normalization (spatial and temporal)
    * Optimizers
        - SGD w/ momentum
        - AdaGrad
        - RMSProp
        - Adam
    * Learning Rate Schedulers
        - Constant
        - Exponential
        - Noam/Transformer
        - Dlib scheduler
    * Weight Initializers
        - Glorot/Xavier uniform and normal
        - He/Kaiming uniform and normal
        - Standard and truncated normal
    * Losses
        - Cross entropy
        - Squared error
        - Bernoulli VAE loss
        - Wasserstein loss with gradient penalty
        - Noise contrastive estimation loss
    * Activations
        - ReLU
        - Tanh
        - Affine
        - Sigmoid
        - Leaky ReLU
        - ELU
        - SELU
        - GELU
        - Exponential
        - Hard Sigmoid
        - Softplus
    * Models
        - Bernoulli variational autoencoder
        - Wasserstein GAN with gradient penalty
        - word2vec encoder with skip-gram and CBOW architectures
    * Utilities
        - `col2im` (MATLAB port)
        - `im2col` (MATLAB port)
        - `conv1D`
        - `conv2D`
        - `deconv2D`
        - `minibatch`

5. **Tree-based models**
    - Decision trees (CART)
    - [Bagging] Random forests
    - [Boosting] Gradient-boosted decision trees

6. **Linear models**
    - Ridge regression
    - Logistic regression
    - Ordinary least squares
    - Weighted linear regression
    - Generalized linear model (log, logit, and identity link)
    - Gaussian naive Bayes classifier
    - Bayesian linear regression w/ conjugate priors
        - Unknown mean, known variance (Gaussian prior)
        - Unknown mean, unknown variance (Normal-Gamma / Normal-Inverse-Wishart prior)

7. **n-Gram sequence models**
    - Maximum likelihood scores
    - Additive/Lidstone smoothing
    - Simple Good-Turing smoothing

8. **Multi-armed bandit models**
    - UCB1
    - LinUCB
    - Epsilon-greedy
    - Thompson sampling w/ conjugate priors
        - Beta-Bernoulli sampler
    - LinUCB

8. **Reinforcement learning models**
    - Cross-entropy method agent
    - First visit on-policy Monte Carlo agent
    - Weighted incremental importance sampling Monte Carlo agent
    - Expected SARSA agent
    - TD-0 Q-learning agent
    - Dyna-Q / Dyna-Q+ with prioritized sweeping

9. **Nonparameteric models**
    - Nadaraya-Watson kernel regression
    - k-Nearest neighbors classification and regression
    - Gaussian process regression

10. **Matrix factorization**
    - Regularized alternating least-squares
    - Non-negative matrix factorization

11. **Preprocessing**
    - Discrete Fourier transform (1D signals)
    - Discrete cosine transform (type-II) (1D signals)
    - Bilinear interpolation (2D signals)
    - Nearest neighbor interpolation (1D and 2D signals)
    - Autocorrelation (1D signals)
    - Signal windowing
    - Text tokenization
    - Feature hashing
    - Feature standardization
    - One-hot encoding / decoding
    - Huffman coding / decoding
    - Byte pair encoding / decoding
    - Term frequency-inverse document frequency (TF-IDF) encoding
    - MFCC encoding

12. **Utilities**
    - Similarity kernels
    - Distance metrics
    - Priority queue
    - Ball tree
    - Discrete sampler
    - Graph processing and generators
</details>

## Contributing

Am I missing your favorite model? Is there something that could be cleaner /
less confusing? Did I mess something up? Submit a PR! The only requirement is
that your models are written with just the [Python standard
library](https://docs.python.org/3/library/) and [NumPy](https://www.numpy.org/). The
[SciPy library](https://scipy.github.io/devdocs/) is also permitted under special
circumstances ;)

See full contributing guidelines [here](./CONTRIBUTING.md).


================================================
FILE: docs/Makefile
================================================
# Minimal makefile for Sphinx documentation
#

# You can set these variables from the command line.
SPHINXOPTS    =
SPHINXBUILD   = sphinx-build
SOURCEDIR     = .
BUILDDIR      = _build

# Put it first so that "make" without argument is like "make help".
help:
	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

.PHONY: help Makefile

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


================================================
FILE: docs/README.md
================================================
To build the documentation locally, [install sphinx](http://www.sphinx-doc.org/en/master/usage/installation.html), cd into the docs, directory and run `make html`. Local files will be generated in the `docs/_build/html` directory.


================================================
FILE: docs/conf.py
================================================
# -*- coding: utf-8 -*-
#
# Configuration file for the Sphinx documentation builder.
#
# This file does only contain a selection of the most common options. For a
# full list see the documentation:
# http://www.sphinx-doc.org/en/master/config

# -- Path setup --------------------------------------------------------------

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys
import inspect

sys.path.insert(0, os.path.abspath(".."))


gh_url = "https://github.com/ddbourgin/numpy-ml"

# -- Project information -----------------------------------------------------

project = "numpy-ml"
copyright = "2022, David Bourgin"
author = "David Bourgin"

# The short X.Y version
version = "0.1"
# The full version, including alpha/beta/rc tags
release = "0.1.0"


# -- General configuration ---------------------------------------------------

# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
    "sphinx.ext.autodoc",
    "sphinx.ext.doctest",
    "sphinx.ext.intersphinx",
    "sphinx.ext.todo",
    "sphinx.ext.coverage",
    "sphinx.ext.mathjax",
    "sphinx.ext.ifconfig",
    "sphinx.ext.githubpages",
    "sphinx.ext.napoleon",
    "sphinx.ext.linkcode"
    #  "numpydoc",
]

# to avoid memory errors in the read-the-docs build process
autodoc_mock_imports = ["tensorflow", "torch", "gym"]

# Try to link to source code on GitHub
def linkcode_resolve(domain, info):
    if domain != "py":
        return None

    module = info.get("module", None)
    fullname = info.get("fullname", None)

    if not module or not fullname:
        return None

    obj = sys.modules.get(module, None)
    if obj is None:
        return None

    for part in fullname.split("."):
        obj = getattr(obj, part)
        if isinstance(obj, property):
            obj = obj.fget

    try:
        file = inspect.getsourcefile(obj)
        if file is None:
            return None
    except:
        return None

    file = os.path.relpath(file, start=os.path.abspath(".."))
    source, line_start = inspect.getsourcelines(obj)
    line_end = line_start + len(source) - 1
    filename = f"{file}#L{line_start}-L{line_end}"
    return f"{gh_url}/blob/master/{filename}"


# Napoleon settings
# https://sphinxcontrib-napoleon.readthedocs.io/en/latest/sphinxcontrib.napoleon.html#sphinxcontrib.napoleon.Config
napoleon_google_docstring = False
napoleon_numpy_docstring = True
napoleon_include_init_with_doc = False
napoleon_include_private_with_doc = False
napoleon_include_special_with_doc = False
napoleon_use_admonition_for_examples = False
napoleon_use_admonition_for_notes = False
napoleon_use_admonition_for_references = False
napoleon_use_ivar = True
napoleon_use_param = True
napoleon_use_rtype = False
napoleon_use_keyword = True


# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]

# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
source_suffix = ".rst"

# The master toctree document.
master_doc = "index"

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = "friendly"

autoclass_content = "both"


# -- Options for HTML output -------------------------------------------------

# The theme to use for HTML and HTML Help pages.  See the documentation for
# a list of builtin themes.
#
html_theme = "alabaster"

# Theme options are theme-specific and customize the look and feel of a theme
# further.  For a list of options available for each theme, see the
# documentation.
#
# html_theme_options = {}
html_css_files = ["css/custom.css"]

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ["_static"]

# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# The default sidebars (for documents that don't match any pattern) are
# defined by theme itself.  Builtin themes are using these templates by
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
# 'searchbox.html']``.
#
html_sidebars = {
    "**": [
        "about.html",
        "navigation.html",
        "relations.html",
        "searchbox.html",
        "donate.html",
    ]
}

html_theme_options = {
    "github_user": "ddbourgin",
    "github_repo": "numpy-ml",
    "description": "Machine learning, in NumPy",
    "github_button": True,
    "show_powered_by": False,
    "fixed_sidebar": True,
    "analytics_id": "UA-65839510-3",
    #  'logo': 'logo.png',
}


# -- Options for HTMLHelp output ---------------------------------------------

# Output file base name for HTML help builder.
htmlhelp_basename = "numpy-mldoc"


# -- Options for LaTeX output ------------------------------------------------

latex_elements = {
    # The paper size ('letterpaper' or 'a4paper').
    #
    # 'papersize': 'letterpaper',
    # The font size ('10pt', '11pt' or '12pt').
    #
    # 'pointsize': '10pt',
    # Additional stuff for the LaTeX preamble.
    #
    # 'preamble': '',
    # Latex figure (float) alignment
    #
    # 'figure_align': 'htbp',
}

# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
#  author, documentclass [howto, manual, or own class]).
latex_documents = [
    (master_doc, "numpy-ml.tex", "numpy-ml Documentation", "David Bourgin", "manual")
]


# -- Options for manual page output ------------------------------------------

# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [(master_doc, "numpy-ml", "numpy-ml Documentation", [author], 1)]


# -- Options for Texinfo output ----------------------------------------------

# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
#  dir menu entry, description, category)
texinfo_documents = [
    (
        master_doc,
        "numpy-ml",
        "numpy-ml Documentation",
        author,
        "numpy-ml",
        "Machine learning, in NumPy.",
        "Miscellaneous",
    )
]


# -- Options for Epub output -------------------------------------------------

# Bibliographic Dublin Core info.
epub_title = project

# The unique identifier of the text. This can be a ISBN number
# or the project homepage.
#
# epub_identifier = ''

# A unique identification for the text.
#
# epub_uid = ''

# A list of files that should not be packed into the epub file.
epub_exclude_files = ["search.html"]

autodoc_member_order = "bysource"


# -- Extension configuration -------------------------------------------------

# -- Options for intersphinx extension ---------------------------------------

# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {
    "python": ("https://docs.python.org/", None),
    "numpy": ("http://docs.scipy.org/doc/numpy/", None),
}

# -- Options for todo extension ----------------------------------------------

# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = True

# -- Options for numpydocs extension -----------------------------------------
# https://numpydoc.readthedocs.io/en/latest/install.html

# Whether to produce plot:: directives for Examples sections that contain
# import matplotlib or from matplotlib import.
numpydoc_use_plots = True

# Whether to show all members of a class in the Methods and Attributes sections
# automatically. True by default.
numpydoc_show_class_members = True

#  Whether to show all inherited members of a class in the Methods and
#  Attributes sections automatically. If it’s false, inherited members won’t
#  shown. True by default.
numpydoc_show_inherited_class_members = True

#  Whether to create a Sphinx table of contents for the lists of class methods
#  and attributes. If a table of contents is made, Sphinx expects each entry to
#  have a separate page. True by default.
numpydoc_class_members_toctree = False

# A regular expression matching citations which should be mangled to avoid
# conflicts due to duplication across the documentation. Defaults to [\w-]+.
numpydoc_citation_re = r"[\w-]+"

# Until version 0.8, parameter definitions were shown as blockquotes, rather
# than in a definition list. If your styling requires blockquotes, switch this
# config option to True. This option will be removed in version 0.10.
numpydoc_use_blockquotes = False

# Whether to format the Attributes section of a class page in the same way as
# the Parameter section. If it's False, the Attributes section will be
# formatted as the Methods section using an autosummary table. True by default.
numpydoc_attributes_as_param_list = False

# Whether to create cross-references for the parameter types in the Parameters,
# Other Parameters, Returns and Yields sections of the docstring. False by
# default.
numpydoc_xref_param_type = False

#  Mappings to fully qualified paths (or correct ReST references) for the
#  aliases/shortcuts used when specifying the types of parameters. The keys
#  should not have any spaces. Together with the intersphinx extension, you can
#  map to links in any documentation. The default is an empty dict.  This
#  option depends on the numpydoc_xref_param_type option being True.
numpydoc_xref_aliases = {}

#  Words not to cross-reference. Most likely, these are common words used in
#  parameter type descriptions that may be confused for classes of the same
#  name. For example: {'type', 'optional', 'default'}. The default is an empty
#  set.
numpydoc_xref_ignore = set([])

#  Deprecated since version edit: your HTML template instead Whether to insert
#  an edit link after docstrings.
numpydoc_edit_link: bool


================================================
FILE: docs/index.rst
================================================
Welcome to numpy-ml
===================
`numpy-ml`_ is a growing collection of machine learning models, algorithms, and
tools written exclusively in `NumPy`_ and the Python `standard library`_.

The purpose of the project is to provide reference implementations of common
machine learning components for rapid prototyping and experimentation. With
that in mind, don't just read the docs -- read the source!

.. _numpy-ml: https://www.github.com/ddbourgin/numpy-ml
.. _NumPy: https://numpy.org/
.. _standard library: https://docs.python.org/3/library/

.. topic:: This documentation is under development!

    We're working to expand our coverage. During this time there are likely to
    be typos, bugs, and poorly-worded sections. If you encounter any of the
    above, please file an `issue`_ or submit a `pull request`_!

.. _issue: https://github.com/ddbourgin/numpy-ml/issues
.. _pull request: https://github.com/ddbourgin/numpy-ml/pulls

.. toctree::
   :maxdepth: 3
   :hidden:

   numpy_ml.hmm

   numpy_ml.gmm

   numpy_ml.lda

   numpy_ml.ngram

   numpy_ml.bandits

   numpy_ml.rl_models

   numpy_ml.nonparametric

   numpy_ml.factorization

   numpy_ml.trees

   numpy_ml.neural_nets

   numpy_ml.linear_models

   numpy_ml.preprocessing

   numpy_ml.utils

##########
Disclaimer
##########

This software is provided as-is: there are no guarantees that it fits your
purposes or that it is bug-free. Use it at your own risk!


================================================
FILE: docs/make.bat
================================================
@ECHO OFF

pushd %~dp0

REM Command file for Sphinx documentation

if "%SPHINXBUILD%" == "" (
	set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=_build

if "%1" == "" goto help

%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
	echo.
	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
	echo.installed, then set the SPHINXBUILD environment variable to point
	echo.to the full path of the 'sphinx-build' executable. Alternatively you
	echo.may add the Sphinx directory to PATH.
	echo.
	echo.If you don't have Sphinx installed, grab it from
	echo.http://sphinx-doc.org/
	exit /b 1
)

%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
goto end

:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%

:end
popd


================================================
FILE: docs/numpy_ml.bandits.bandits.rst
================================================
Bandit Environments
===================

``Bandit``
-----------
.. autoclass:: numpy_ml.bandits.bandits.Bandit
    :members:
    :undoc-members:
    :inherited-members:


``MultinomialBandit``
-------------------------
.. autoclass:: numpy_ml.bandits.MultinomialBandit
    :members:
    :undoc-members:
    :show-inheritance:

``BernoulliBandit``
-----------------------
.. autoclass:: numpy_ml.bandits.BernoulliBandit
    :members:
    :undoc-members:
    :show-inheritance:


``GaussianBandit``
----------------------
.. autoclass:: numpy_ml.bandits.GaussianBandit
    :members:
    :undoc-members:
    :show-inheritance:

``ShortestPathBandit``
-----------------------
.. autoclass:: numpy_ml.bandits.ShortestPathBandit
    :members:
    :undoc-members:
    :show-inheritance:

``ContextualBernoulliBandit``
------------------------------
.. autoclass:: numpy_ml.bandits.ContextualBernoulliBandit
    :members:
    :undoc-members:
    :show-inheritance:

``ContextualLinearBandit``
------------------------------
.. autoclass:: numpy_ml.bandits.ContextualLinearBandit
    :members:
    :undoc-members:
    :show-inheritance:


================================================
FILE: docs/numpy_ml.bandits.policies.rst
================================================
Policies
=========

``BanditPolicyBase``
--------------------
.. autoclass:: numpy_ml.bandits.policies.BanditPolicyBase
    :members:
    :undoc-members:
    :inherited-members:

``EpsilonGreedy``
-----------------
.. autoclass:: numpy_ml.bandits.policies.EpsilonGreedy
    :members:
    :undoc-members:
    :show-inheritance:

``UCB1``
--------
.. autoclass:: numpy_ml.bandits.policies.UCB1
    :members:
    :undoc-members:
    :show-inheritance:

``ThompsonSamplingBetaBinomial``
--------------------------------
.. autoclass:: numpy_ml.bandits.policies.ThompsonSamplingBetaBinomial
    :members:
    :undoc-members:
    :show-inheritance:

``LinUCB``
--------------------------------
.. autoclass:: numpy_ml.bandits.policies.LinUCB
    :members:
    :undoc-members:
    :show-inheritance:


================================================
FILE: docs/numpy_ml.bandits.rst
================================================
Multi-armed bandits
###################

.. toctree::
   :maxdepth: 3

   numpy_ml.bandits.bandits

   numpy_ml.bandits.policies

   numpy_ml.bandits.trainer


================================================
FILE: docs/numpy_ml.bandits.trainer.rst
================================================
Trainer
=======

``BanditTrainer``
------------------
.. autoclass:: numpy_ml.bandits.trainer.BanditTrainer
    :members:
    :undoc-members:
    :inherited-members:


================================================
FILE: docs/numpy_ml.factorization.factors.rst
================================================
``VanillaALS``
--------------
.. autoclass:: numpy_ml.factorization.VanillaALS
    :members:
    :undoc-members:

``NMF``
--------
.. autoclass:: numpy_ml.factorization.NMF
    :members:
    :undoc-members:


================================================
FILE: docs/numpy_ml.factorization.rst
================================================
Matrix factorization
####################

.. toctree::
   :maxdepth: 3

   numpy_ml.factorization.factors


================================================
FILE: docs/numpy_ml.gmm.gmm.rst
================================================
``GMM``
-------

.. autoclass:: numpy_ml.gmm.GMM
	:members:
	:undoc-members:
	:inherited-members:


================================================
FILE: docs/numpy_ml.gmm.rst
================================================
#######################
Gaussian mixture models
#######################

A `Gaussian mixture model`_ (GMM) is a latent variable model commonly used for
unsupervised clustering.

.. figure:: img/gmm_model.png
    :scale: 30 %
    :align: center

    Graphical model for a GMM with `K` mixture components and `N` data points.

.. _`Gaussian mixture model` : https://en.wikipedia.org/wiki/Mixture_model#Gaussian_mixture_model

A GMM assumes that:

    1. The observed data are generated from a `mixture distribution`_, `P`,
       made up of `K` mixture components.

    2. Each mixture component is a multivariate Gaussian with its own mean
       :math:`\mu`, covariance matrix, :math:`\Sigma`, and mixture weight,
       :math:`\pi`.

    .. 3. To generate a new data point, we sample a mixture component in
    .. proportion to its prior probability, then draw a sample from the
    .. distribution parameterized by that component's mean and covariance.

.. _mixture distribution: https://en.wikipedia.org/wiki/Mixture_distribution

The parameters of a GMM model are:

    - :math:`\theta`, the set of parameters for each of the `K` mixture
      components. :math:`\theta = \{ \mu_1, \Sigma_1, \pi_i, \ldots, \mu_k,
      \Sigma_k, \pi_k \}`.

Under a GMM, the joint probability of a sequence of cluster assignments `Z` and an observed
dataset :math:`X = \{x_1, \ldots, x_N \}`, is:

.. math::
    p(Z, X \mid \theta) =
        \prod_{i=1}^N p(z_i, x_i \mid \theta) =
            \prod_{i=1}^N \prod_{k=1}^K
                [\mathcal{N}(x_i \mid \mu_k, \Sigma_k) \pi_k ]^{\mathbb{1}_{[z_{i} = k]}}

where

    - :math:`\theta` is the set of GMM parameters: :math:`\theta = \{ \mu_1,
      \Sigma_1, \pi_i, \ldots, \mu_k, \Sigma_k, \pi_k \}`.

    - :math:`Z_i \in \{ 1, \ldots, k \}` is a latent variable reflecting the ID
      of the mixture component that generated data point `i`.

    - :math:`\mathbb{1}_{[z_i = k]}` is a binary indicator function returning
      1 if data point :math:`x_i` was sampled from mixture component :math:`k`
      and 0 otherwise.

As with other latent-variable models, we use the `expectation-maximization (EM)
algorithm`_ to learn the GMM parameters.

.. _expectation-maximization (EM) algorithm : https://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm

**Models**

- :class:`~numpy_ml.gmm.GMM`

**References**

.. [1] Bilmes, J. A. (1998). "A gentle tutorial of the EM algorithm and its
   application to parameter estimation for Gaussian mixture and hidden
   Markov models" *International Computer Science Institute, 4(510)*
   https://www.inf.ed.ac.uk/teaching/courses/pmr/docs/EM.pdf


.. toctree::
   :maxdepth: 2
   :hidden:

   numpy_ml.gmm.gmm


================================================
FILE: docs/numpy_ml.hmm.MultinomialHMM.rst
================================================
``MultinomialHMM``
------------------

.. autoclass:: numpy_ml.hmm.MultinomialHMM
	:members:
	:undoc-members:
	:inherited-members:


================================================
FILE: docs/numpy_ml.hmm.rst
================================================
####################
Hidden Markov models
####################

A `hidden Markov model`_ (HMM) is a generative model for sequences of observations.

.. _`hidden Markov model` : https://en.wikipedia.org/wiki/Hidden_Markov_model

.. figure:: img/hmm_model.png
    :scale: 25 %
    :align: center

    Graphical model for an HMM with :math:`T=4` timesteps.


An HMM assumes:

    1. The observations, `O`, are generated by a process whose states,
       :math:`S`, are *hidden* from the observer.

    2. Each hidden state is a discrete random variable.

    3. The hidden state at time `t` is independent of all hidden states before
       time :math:`t - 1`.

    4. The observation :math:`O_t` is independent of all previous states and
       observations given the current hidden state, :math:`S_t`.

The parameters of an HMM model are:

    - :math:`\pi`, the prior specifying :math:`P(S_1)`.

    - :math:`\theta`, the :math:`K \times K` transition matrix specifying
      :math:`P(S_t \mid S_{t-1})`.

    - :math:`\phi`, the output model defining :math:`P(Y_t \mid S_t)`. If the
      observations are discrete, this is a :math:`K \times L` emission matrix,
      where `L` is the number of unique observation symbols.

The HMM joint distribution of a sequence of states and observations is:

.. math::

    P(S_{1:T}, O_{1:T}) = P(S_1) P(O_1 \mid S_1) \prod_{t=2}^T P(S_t \mid S_{t-1})P(O_t \mid S_t)

where :math:`X_{1:T}` is shorthand for :math:`X_1, \ldots, X_T`.

As with other latent-variable models, we use the `expectation-maximization
(EM) algorithm`_ to learn the model parameters.  The HMM-optimized version of
the EM algorithm is known as the `forward-backward`_ / `Baum-Welch algorithm`_.

.. _expectation-maximization (EM) algorithm : https://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm
.. _forward-backward: https://en.wikipedia.org/wiki/Forward%E2%80%93backward_algorithm
.. _Baum-Welch algorithm: https://en.wikipedia.org/wiki/Baum%E2%80%93Welch_algorithm

**Models**

- :class:`~numpy_ml.hmm.MultinomialHMM`

**References**

.. [1] Ghahramani, Z. (2001). "An Intro to HMMs and Bayesian networks".
       *International Journal of Pattern Recognition and AI, 15(1)*: 9-42.

.. toctree::
   :maxdepth: 2
   :hidden:

   numpy_ml.hmm.MultinomialHMM


================================================
FILE: docs/numpy_ml.lda.lda.rst
================================================
``LDA``
=======

.. autoclass:: numpy_ml.lda.LDA
    :members:
    :undoc-members:
    :inherited-members:


================================================
FILE: docs/numpy_ml.lda.rst
================================================
###########################
Latent Dirichlet allocation
###########################

`Latent Dirichlet allocation`_ (LDA, commonly known as a topic model) is a
generative model for `bags of words`_.

.. _`Latent Dirichlet allocation` : https://en.wikipedia.org/wiki/Latent_Dirichlet_allocation
.. _bags of words : https://en.wikipedia.org/wiki/Bag-of-words_model


.. figure:: img/lda_model_smoothed.png
    :scale: 25 %
    :align: center

    The smoothed LDA model with `T` topics, `D` documents, and :math:`N_d` words per document.

In LDA, each word in a piece of text is associated with one of `T` latent
`topics`. A `document` is an unordered collection (bag) of words. During
inference, the goal is to estimate probability of each word token under each
topic, along with the per-document topic mixture weights, using only the
observed text.

The parameters of the LDA model are:

    - :math:`\theta`, the document-topic distribution. We use
      :math:`\theta^{(i)}` to denote the parameters of the `categorical`_
      distribution over topics associated with document :math:`i`.

    - :math:`\phi`, the topic-word distribution. We use :math:`\phi^{(j)}` to
      denote the parameters of the `categorical`_ distribution over words
      associated with topic :math:`j`.

.. _categorical : https://en.wikipedia.org/wiki/Categorical_distribution

The standard LDA model [1]_ places a `Dirichlet`_ prior on :math:`\theta`:

.. math::
    \theta^{(d)}  \sim  \text{Dir}(\alpha)

The smoothed/fully-Bayesian LDA model [2]_ adds an additional `Dirichlet`_ prior on :math:`\phi`:

.. math::
    \phi^{(j)}  \sim  \text{Dir}(\beta)

.. _Dirichlet : https://en.wikipedia.org/wiki/Dirichlet_distribution

To generate a document with the smoothed LDA model, we:

    1. Sample the parameters for the distribution over topics,
       :math:`\theta \sim \text{Dir}(\alpha)`.

    2. Sample a topic, :math:`z \sim \text{Cat}(\theta)`.

    3. If we haven't already, sample the parameters for topic `z`'s categorical
       distribution over words, :math:`\phi^{(z)} \sim \text{Dir}(\beta)`.

    4. Sample a word, :math:`w \sim \text{Cat}(\phi^{(z)})`.

    5. Repeat steps 2 through 4 until we have a bag of `N` words.

The joint distribution over words, topics, :math:`\theta`, and :math:`\phi`
under the smoothed LDA model is:

.. math::

    P(w, z, \phi, \theta \mid \alpha, \beta) = \left( \prod_{t=1}^T \text{Dir}(\phi^{(t)}; \beta) \right) \prod_{d=1}^D \text{Dir}(\theta^{(d)}; \alpha) \prod_{n=1}^{N_d} P(z_n \mid \theta^{(d)}) P(w_n \mid \phi^{(z_n)})

The parameters of the LDA model can be learned using `variational expectation
maximization`_ or Markov chain Monte Carlo (e.g., `collapsed Gibbs sampling`_).

.. _`variational expectation maximization`: https://en.wikipedia.org/wiki/Variational_Bayesian_methods
.. _`collapsed Gibbs sampling`: https://en.wikipedia.org/wiki/Gibbs_sampling#Collapsed_Gibbs_sampler

**Models**

- :class:`~numpy_ml.lda.LDA`
- :class:`~numpy_ml.lda.SmoothedLDA`

**References**

.. [1]  Blei, D., Ng, A., & Jordan, M. (2003). "Latent Dirichlet allocation". *Journal of
   Machine Learning Research*, *3*, 993–1022.
.. [2]  Griffiths, T. & Steyvers, M. (2004). "Finding scientific topics".
   *PNAS*, *101(1)*, 5228-5235.

.. toctree::
   :maxdepth: 3
   :hidden:

   numpy_ml.lda.lda
   numpy_ml.lda.smoothed_lda


================================================
FILE: docs/numpy_ml.lda.smoothed_lda.rst
================================================
``SmoothedLDA``
===============

.. autoclass:: numpy_ml.lda.SmoothedLDA
    :members:
    :undoc-members:
    :inherited-members:
    :show-inheritance:


================================================
FILE: docs/numpy_ml.linear_models.lm.rst
================================================
``LinearRegression``
--------------------

.. autoclass:: numpy_ml.linear_models.LinearRegression
	:members:
	:undoc-members:
	:inherited-members:

``RidgeRegression``
-------------------

.. autoclass:: numpy_ml.linear_models.RidgeRegression
	:members:
	:undoc-members:
	:inherited-members:

``LogisticRegression``
----------------------

.. autoclass:: numpy_ml.linear_models.LogisticRegression
	:members:
	:undoc-members:
	:inherited-members:

``BayesianLinearRegressionUnknownVariance``
-------------------------------------------

.. autoclass:: numpy_ml.linear_models.BayesianLinearRegressionUnknownVariance
	:members:
	:undoc-members:
	:inherited-members:

``BayesianLinearRegressionKnownVariance``
-----------------------------------------

.. autoclass:: numpy_ml.linear_models.BayesianLinearRegressionKnownVariance
	:members:
	:undoc-members:
	:inherited-members:

``GaussianNBClassifier``
-----------------------------------------

.. autoclass:: numpy_ml.linear_models.GaussianNBClassifier
	:members:
	:undoc-members:
	:inherited-members:

``GeneralizedLinearModel``
-----------------------------------------

.. autoclass:: numpy_ml.linear_models.GeneralizedLinearModel
	:members:
	:undoc-members:
	:inherited-members:


================================================
FILE: docs/numpy_ml.linear_models.rst
================================================
Linear models
#############

.. raw:: html

   <h2>Ordinary and Weighted Linear Least Squares</h2>

In weighted linear least-squares regression (WLS), a real-valued target
:math:`y_i`, is modeled as a linear combination of covariates
:math:`\mathbf{x}_i` and model coefficients **b**:

.. math::

    y_i = \mathbf{b}^\top \mathbf{x}_i + \epsilon_i

In the above equation, :math:`\epsilon_i \sim \mathcal{N}(0, \sigma_i^2)` is a
normally distributed error term with variance :math:`\sigma_i^2`. Ordinary
least squares (OLS) is a special case of this model where the variance is fixed
across all examples, i.e., :math:`\sigma_i = \sigma_j \ \forall i,j`. The
maximum likelihood model parameters, :math:`\hat{\mathbf{b}}_{WLS}`, are those
that minimize the weighted squared error between the model predictions and the
true values:

.. math::

    \mathcal{L} = ||\mathbf{W}^{0.5}(\mathbf{y} - \mathbf{bX})||_2^2

where :math:`\mathbf{W}` is a diagonal matrix of the example weights. In OLS,
:math:`\mathbf{W}` is the identity matrix. The maximum likelihood estimate for
the model parameters can be computed in closed-form using the normal equations:

.. math::

    \hat{\mathbf{b}}_{WLS} =
        (\mathbf{X}^\top \mathbf{WX})^{-1} \mathbf{X}^\top \mathbf{Wy}


**Models**

- :class:`~numpy_ml.linear_models.LinearRegression`

.. raw:: html

   <h2>Ridge Regression</h2>

Ridge regression uses the same simple linear regression model but adds an
additional penalty on the `L2`-norm of the coefficients to the loss function.
This is sometimes known as Tikhonov regularization.

In particular, the ridge model is the same as the OLS model:

.. math::

    \mathbf{y} = \mathbf{bX} + \mathbf{\epsilon}

where :math:`\epsilon \sim \mathcal{N}(\mathbf{0}, \sigma^2 \mathbf{I})`,
except now the error for the model is calculated as

.. math::

    \mathcal{L} = ||\mathbf{y} - \mathbf{bX}||_2^2 + \alpha ||\mathbf{b}||_2^2

The MLE for the model parameters **b** can be computed in closed form via
the adjusted normal equation:

.. math::

    \hat{\mathbf{b}}_{Ridge} =
        (\mathbf{X}^\top \mathbf{X} + \alpha \mathbf{I})^{-1} \mathbf{X}^\top \mathbf{y}

where :math:`(\mathbf{X}^\top \mathbf{X} + \alpha \mathbf{I})^{-1}
\mathbf{X}^\top` is the pseudoinverse / Moore-Penrose inverse adjusted for
the `L2` penalty on the model coefficients.

**Models**

- :class:`~numpy_ml.linear_models.RidgeRegression`

.. raw:: html

   <h2>Bayesian Linear Regression</h2>

In its general form, Bayesian linear regression extends the simple linear
regression model by introducing priors on model parameters *b* and/or the
error variance :math:`\sigma^2`.

The introduction of a prior allows us to quantify the uncertainty in our
parameter estimates for b by replacing the MLE point estimate in simple
linear regression with an entire posterior *distribution*, :math:`p(b \mid X, y,
\sigma)`, simply by applying Bayes rule:

.. math::

    p(b \mid X, y) = \frac{ p(y \mid X, b) p(b \mid \sigma) }{p(y \mid X)}

We can also quantify the uncertainty in our predictions :math:`y^*` for some new
data :math:`X^*` with the posterior predictive distribution:

.. math::

    p(y^* \mid X^*, X, Y) = \int_{b} p(y^* \mid X^*, b) p(b \mid X, y) \ \text{d}b

Depending on the choice of prior it may be impossible to compute an
analytic form for the posterior / posterior predictive distribution. In
these cases, it is common to use approximations, either via MCMC or
variational inference.

.. raw:: html

   <h4>Known variance</h4>

--------------------------------

If we happen to already know the error variance :math:`\sigma^2`, the conjugate
prior on `b` is Gaussian. A common parameterization is:

.. math::

    b | \sigma, V  \sim  \mathcal{N}(\mu, \sigma^2 V)

where :math:`\mu`, :math:`\sigma` and :math:`V` are hyperparameters. Ridge
regression is a special case of this model where :math:`\mu = 0`,
:math:`\sigma = 1` and :math:`V = I` (i.e., the prior on *b* is a zero-mean,
unit covariance Gaussian).

Due to the conjugacy of the above prior with the Gaussian likelihood, there
exists a closed-form solution for the posterior over the model
parameters:

.. math::

    A  &=  (V^{-1} + X^\top X)^{-1} \\
    \mu_b  &=  A V^{-1} \mu + A X^\top y \\
    \Sigma_b  &=  \sigma^2 A \\

The model posterior is then

.. math::

    b \mid X, y  \sim  \mathcal{N}(\mu_b, \Sigma_b)

We can also compute a closed-form solution for the posterior predictive distribution as
well:

.. math::

    y^* \mid X^*, X, Y \sim \mathcal{N}(X^* \mu_b, \ \ X^* \Sigma X^{* \top} + I)

where :math:`X^*` is the matrix of new data we wish to predict, and :math:`y^*`
are the predicted targets for those data.

**Models**

- :class:`~numpy_ml.linear_models.BayesianLinearRegressionKnownVariance`


.. raw:: html

   <h4>Unknown variance</h4>

--------------------------------

If *both* *b* and the error variance :math:`\sigma^2` are unknown, the
conjugate prior for the Gaussian likelihood is the Normal-Gamma
distribution (univariate likelihood) or the Normal-Inverse-Wishart
distribution (multivariate likelihood).

    **Univariate**

    .. math::

        b, \sigma^2  &\sim  \text{NG}(\mu, V, \alpha, \beta) \\
        \sigma^2  &\sim  \text{InverseGamma}(\alpha, \beta) \\
        b \mid \sigma^2  &\sim  \mathcal{N}(\mu, \sigma^2 V)

    where :math:`\alpha, \beta, V`, and :math:`\mu` are parameters of the
    prior.

    **Multivariate**

    .. math::

        b, \Sigma  &\sim  \mathcal{NIW}(\mu, \lambda, \Psi, \rho) \\
        \Sigma  &\sim  \mathcal{W}^{-1}(\Psi, \rho) \\
        b \mid \Sigma  &\sim  \mathcal{N}(\mu, \frac{1}{\lambda} \Sigma)

    where :math:`\mu, \lambda, \Psi`, and :math:`\rho` are
    parameters of the prior.


Due to the conjugacy of the above priors with the Gaussian likelihood,
there exists a closed-form solution for the posterior over the model
parameters:

.. math::

    B  &=  y - X \mu \\
    \text{shape}  &=  N + \alpha \\
    \text{scale}  &=  \frac{1}{\text{shape}} (\alpha \beta + B^\top (X V X^\top + I)^{-1} B) \\

where

.. math::

    \sigma^2 \mid X, y  &\sim  \text{InverseGamma}(\text{shape}, \text{scale}) \\
    A  &=  (V^{-1} + X^\top X)^{-1} \\
    \mu_b  &=  A V^{-1} \mu + A X^\top y \\
    \Sigma_b  &=  \sigma^2 A

The model posterior is then

.. math::

    b | X, y, \sigma^2 \sim \mathcal{N}(\mu_b, \Sigma_b)

We can also compute a closed-form solution for the posterior predictive distribution:

.. math::

    y^* \mid X^*, X, Y \sim \mathcal{N}(X^* \mu_b, \ X^* \Sigma_b X^{* \top} + I)

**Models**

- :class:`~numpy_ml.linear_models.BayesianLinearRegressionUnknownVariance`

.. raw:: html

   <h2>Naive Bayes Classifier</h2>

The naive Bayes model assumes the features of a training example
:math:`\mathbf{x}` are mutually independent given the example label :math:`y`:

.. math::

    P(\mathbf{x}_i \mid y_i) = \prod_{j=1}^M P(x_{i,j} \mid y_i)

where :math:`M` is the rank of the :math:`i^{th}` example :math:`\mathbf{x}_i`
and :math:`y_i` is the label associated with the :math:`i^{th}` example.

Combining this conditional independence assumption with a simple application of
Bayes' theorem gives the naive Bayes classification rule:

.. math::

    \hat{y} &= \arg \max_y P(y \mid \mathbf{x}) \\
            &= \arg \max_y  P(y) P(\mathbf{x} \mid y) \\
            &= \arg \max_y  P(y) \prod_{j=1}^M P(x_j \mid y)

The prior class probability :math:`P(y)` can be specified in advance or
estimated empirically from the training data.

**Models**

- :class:`~numpy_ml.linear_models.GaussianNBClassifier`

.. raw:: html

   <h2>Generalized Linear Model</h2>

The generalized linear model (GLM) assumes that each target/dependent variable
:math:`y_i` in target vector :math:`\mathbf{y} = (y_1, \ldots, y_n)`, has been
drawn independently from a pre-specified distribution in the exponential family
with unknown mean :math:`\mu_i`. The GLM models a (one-to-one, continuous,
differentiable) function, *g*, of this mean value as a linear combination of
the model parameters :math:`\mathbf{b}` and observed covariates,
:math:`\mathbf{x}_i` :

.. math::

    g(\mathbb{E}[y_i \mid \mathbf{x}_i]) =
        g(\mu_i) = \mathbf{b}^\top \mathbf{x}_i

where *g* is known as the link function.  The choice of link function is
informed by the instance of the exponential family the target is drawn from.

**Models**

- :class:`~numpy_ml.linear_models.GeneralizedLinearModel`

.. toctree::
   :maxdepth: 2
   :hidden:

   numpy_ml.linear_models.lm


================================================
FILE: docs/numpy_ml.neural_nets.activations.rst
================================================
Activations
===========

Popular (and some not-so-popular) activation functions for use within arbitrary
neural networks.

``Affine``
-----------
.. autoclass:: numpy_ml.neural_nets.activations.Affine
    :members:
    :undoc-members:
    :inherited-members:

``ELU``
-----------
.. autoclass:: numpy_ml.neural_nets.activations.ELU
    :members:
    :undoc-members:
    :inherited-members:

``Exponential``
---------------
.. autoclass:: numpy_ml.neural_nets.activations.Exponential
    :members:
    :undoc-members:
    :inherited-members:

``HardSigmoid``
---------------
.. autoclass:: numpy_ml.neural_nets.activations.HardSigmoid
    :members:
    :undoc-members:
    :inherited-members:

``Identity``
---------------
.. autoclass:: numpy_ml.neural_nets.activations.Identity
    :members:
    :undoc-members:
    :inherited-members:

``LeakyReLU``
-------------
.. autoclass:: numpy_ml.neural_nets.activations.LeakyReLU
    :members:
    :undoc-members:
    :inherited-members:

``ReLU``
---------
.. autoclass:: numpy_ml.neural_nets.activations.ReLU
    :members:
    :undoc-members:
    :inherited-members:

``SELU``
---------
.. autoclass:: numpy_ml.neural_nets.activations.SELU
    :members:
    :undoc-members:
    :inherited-members:

``GELU``
-----------
.. autoclass:: numpy_ml.neural_nets.activations.GELU
    :members:
    :undoc-members:
    :inherited-members:

``Sigmoid``
------------
.. autoclass:: numpy_ml.neural_nets.activations.Sigmoid
    :members:
    :undoc-members:
    :inherited-members:

``SoftPlus``
------------
.. autoclass:: numpy_ml.neural_nets.activations.SoftPlus
    :members:
    :undoc-members:
    :inherited-members:

``Tanh``
---------
.. autoclass:: numpy_ml.neural_nets.activations.Tanh
    :members:
    :undoc-members:
    :inherited-members:


================================================
FILE: docs/numpy_ml.neural_nets.initializers.rst
================================================
Initializers
=============

``ActivationInitializer``
--------------------------
.. autoclass:: numpy_ml.neural_nets.initializers.ActivationInitializer
    :members:
    :undoc-members:
    :inherited-members:

``OptimizerInitializer``
--------------------------
.. autoclass:: numpy_ml.neural_nets.initializers.OptimizerInitializer
    :members:
    :undoc-members:
    :inherited-members:

``SchedulerInitializer``
--------------------------
.. autoclass:: numpy_ml.neural_nets.initializers.SchedulerInitializer
    :members:
    :undoc-members:
    :inherited-members:

``WeightInitializer``
------------------------
.. autoclass:: numpy_ml.neural_nets.initializers.WeightInitializer
    :members:
    :undoc-members:
    :inherited-members:


================================================
FILE: docs/numpy_ml.neural_nets.layers.rst
================================================
Layers
======

``LayerBase``
-------------
.. autoclass:: numpy_ml.neural_nets.layers.layers.LayerBase
    :members:
    :undoc-members:
    :inherited-members:

``Add``
-------
.. autoclass:: numpy_ml.neural_nets.layers.Add
    :members:
    :undoc-members:
    :show-inheritance:

``BatchNorm1D``
---------------
.. autoclass:: numpy_ml.neural_nets.layers.BatchNorm1D
    :members:
    :undoc-members:
    :show-inheritance:

``BatchNorm2D``
---------------
.. autoclass:: numpy_ml.neural_nets.layers.BatchNorm2D
    :members:
    :undoc-members:
    :show-inheritance:

``Conv1D``
----------
.. autoclass:: numpy_ml.neural_nets.layers.Conv1D
    :members:
    :undoc-members:
    :show-inheritance:

``Conv2D``
----------
.. autoclass:: numpy_ml.neural_nets.layers.Conv2D
    :members:
    :undoc-members:
    :show-inheritance:

``Deconv2D``
------------
.. autoclass:: numpy_ml.neural_nets.layers.Deconv2D
    :members:
    :undoc-members:
    :show-inheritance:

``DotProductAttention``
-----------------------
.. autoclass:: numpy_ml.neural_nets.layers.DotProductAttention
    :members:
    :undoc-members:
    :show-inheritance:

``Embedding``
-------------
.. autoclass:: numpy_ml.neural_nets.layers.Embedding
    :members:
    :undoc-members:
    :show-inheritance:

``Flatten``
-----------
.. autoclass:: numpy_ml.neural_nets.layers.Flatten
    :members:
    :undoc-members:
    :show-inheritance:

``FullyConnected``
------------------
.. autoclass:: numpy_ml.neural_nets.layers.FullyConnected
    :members:
    :undoc-members:
    :show-inheritance:

``LSTM``
--------
.. autoclass:: numpy_ml.neural_nets.layers.LSTM
    :members:
    :undoc-members:
    :show-inheritance:

``LSTMCell``
------------
.. autoclass:: numpy_ml.neural_nets.layers.LSTMCell
    :members:
    :undoc-members:
    :show-inheritance:

``LayerNorm1D``
---------------
.. autoclass:: numpy_ml.neural_nets.layers.LayerNorm1D
    :members:
    :undoc-members:
    :show-inheritance:

``LayerNorm2D``
---------------
.. autoclass:: numpy_ml.neural_nets.layers.LayerNorm2D
    :members:
    :undoc-members:
    :show-inheritance:

``Multiply``
------------
.. autoclass:: numpy_ml.neural_nets.layers.Multiply
    :members:
    :undoc-members:
    :show-inheritance:

``Pool2D``
------------
.. autoclass:: numpy_ml.neural_nets.layers.Pool2D
    :members:
    :undoc-members:
    :show-inheritance:

``RNN``
-------
.. autoclass:: numpy_ml.neural_nets.layers.RNN
    :members:
    :undoc-members:
    :show-inheritance:

``RNNCell``
-----------
.. autoclass:: numpy_ml.neural_nets.layers.RNNCell
    :members:
    :undoc-members:
    :show-inheritance:

``RBM``
-------------------------------
.. autoclass:: numpy_ml.neural_nets.layers.RBM
    :members:
    :undoc-members:
    :show-inheritance:

``Softmax``
-----------
.. autoclass:: numpy_ml.neural_nets.layers.Softmax
    :members:
    :undoc-members:
    :show-inheritance:

``SparseEvolution``
-------------------
.. autoclass:: numpy_ml.neural_nets.layers.SparseEvolution
    :members:
    :undoc-members:
    :show-inheritance:


================================================
FILE: docs/numpy_ml.neural_nets.losses.rst
================================================
Loss functions
==============

``CrossEntropy``
----------------
.. autoclass:: numpy_ml.neural_nets.losses.CrossEntropy
    :members:
    :undoc-members:
    :inherited-members:

``SquaredError``
----------------
.. autoclass:: numpy_ml.neural_nets.losses.SquaredError
    :members:
    :undoc-members:
    :inherited-members:

``NCELoss``
-----------
.. autoclass:: numpy_ml.neural_nets.losses.NCELoss
    :members:
    :undoc-members:
    :inherited-members:

``VAELoss``
-----------
.. autoclass:: numpy_ml.neural_nets.losses.VAELoss
    :members:
    :undoc-members:
    :inherited-members:

``WGAN_GPLoss``
---------------
.. autoclass:: numpy_ml.neural_nets.losses.WGAN_GPLoss
    :members:
    :undoc-members:
    :inherited-members:


================================================
FILE: docs/numpy_ml.neural_nets.models.rst
================================================
Full networks
==============

``WGAN_GP``
----------
.. autoclass:: numpy_ml.neural_nets.models.WGAN_GP
    :members:
    :undoc-members:
    :inherited-members:

``BernoulliVAE``
----------------
.. autoclass:: numpy_ml.neural_nets.models.BernoulliVAE
    :members:
    :undoc-members:
    :inherited-members:

``Word2Vec``
------------
.. autoclass:: numpy_ml.neural_nets.models.Word2Vec
    :members:
    :undoc-members:
    :inherited-members:


================================================
FILE: docs/numpy_ml.neural_nets.modules.rst
================================================
Modules
========

``BidirectionalLSTM``
---------------------
.. autoclass:: numpy_ml.neural_nets.modules.BidirectionalLSTM
    :members:
    :undoc-members:

``MultiHeadedAttentionModule``
------------------------------
.. autoclass:: numpy_ml.neural_nets.modules.MultiHeadedAttentionModule
    :members:
    :undoc-members:

``SkipConnectionConvModule``
------------------------------
.. autoclass:: numpy_ml.neural_nets.modules.SkipConnectionConvModule
    :members:
    :undoc-members:

``SkipConnectionIdentityModule``
------------------------------
.. autoclass:: numpy_ml.neural_nets.modules.SkipConnectionIdentityModule
    :members:
    :undoc-members:

``WavenetResidualModule``
------------------------------
.. autoclass:: numpy_ml.neural_nets.modules.WavenetResidualModule
    :members:
    :undoc-members:


================================================
FILE: docs/numpy_ml.neural_nets.optimizers.rst
================================================
Optimizers
===========
Popular gradient-based strategies for optimizing parameters in neural networks.

For a discussion regarding the generalization performance of the solutions
found via different optimization strategies, see:

.. [1] Wilson et al. (2017) "The marginal value of adaptive gradient methods in machine
   learning", *Proceedings of the 31st Conference on Neural Information Processing Systems*
   https://arxiv.org/pdf/1705.08292.pdf

``OptimizerBase``
-------------
.. autoclass:: numpy_ml.neural_nets.optimizers.optimizers.OptimizerBase
    :members:
    :undoc-members:
    :show-inheritance:

``SGD``
-----------
.. autoclass:: numpy_ml.neural_nets.optimizers.SGD
    :members:
    :undoc-members:
    :show-inheritance:

``AdaGrad``
-----------
.. autoclass:: numpy_ml.neural_nets.optimizers.AdaGrad
    :members:
    :undoc-members:
    :show-inheritance:

``Adam``
-----------
.. autoclass:: numpy_ml.neural_nets.optimizers.Adam
    :members:
    :undoc-members:
    :show-inheritance:

``RMSProp``
-----------
.. autoclass:: numpy_ml.neural_nets.optimizers.RMSProp
    :members:
    :undoc-members:
    :show-inheritance:


================================================
FILE: docs/numpy_ml.neural_nets.rst
================================================
Neural networks
###############
The neural network module includes common building blocks for implementing
modern `deep learning`_ models.

.. _`deep learning`: https://en.wikipedia.org/wiki/Deep_learning

.. raw:: html

   <h2>Layers</h2>

Most modern neural networks can be represented as a `composition`_ of
many small, parametric functions. The functions in this composition are
commonly referred to as the "layers" of the network. As an example, the
multilayer perceptron (MLP) below computes the function :math:`(f
\circ g \circ h)` where, `f`, `g`, and `h` are the individual network layers.

.. figure:: img/mlp_model.png
    :scale: 40 %
    :align: center

    A multilayer perceptron with three layers labeled `f`, `g`, and `h`.

Many neural network layers are parametric: they express different
transformations depending on the setting of their weights (coefficients),
biases (intercepts), and/or other tunable values. These parameters are adjusted
during training to improve the performance of the network on a particular
metric.

The :doc:`numpy_ml.neural_nets.layers` module contains a number of common
transformations that can be composed to create larger networks.

.. _`composition`: https://en.wikipedia.org/wiki/Function_composition

**Layers**

+-----------------------------------------------------+-------------------------------------------------------------+---------------------------------------------------------+
| - :class:`~numpy_ml.neural_nets.layers.Add`         | - :class:`~numpy_ml.neural_nets.layers.Deconv2D`            | - :class:`~numpy_ml.neural_nets.layers.LSTM`            |
+-----------------------------------------------------+-------------------------------------------------------------+---------------------------------------------------------+
| - :class:`~numpy_ml.neural_nets.layers.BatchNorm1D` | - :class:`~numpy_ml.neural_nets.layers.DotProductAttention` | - :class:`~numpy_ml.neural_nets.layers.LSTMCell`        |
+-----------------------------------------------------+-------------------------------------------------------------+---------------------------------------------------------+
| - :class:`~numpy_ml.neural_nets.layers.BatchNorm2D` | - :class:`~numpy_ml.neural_nets.layers.Embedding`           | - :class:`~numpy_ml.neural_nets.layers.LayerNorm1D`     |
+-----------------------------------------------------+-------------------------------------------------------------+---------------------------------------------------------+
| - :class:`~numpy_ml.neural_nets.layers.Conv1D`      | - :class:`~numpy_ml.neural_nets.layers.Flatten`             | - :class:`~numpy_ml.neural_nets.layers.LayerNorm2D`     |
+-----------------------------------------------------+-------------------------------------------------------------+---------------------------------------------------------+
| - :class:`~numpy_ml.neural_nets.layers.Conv2D`      | - :class:`~numpy_ml.neural_nets.layers.FullyConnected`      | - :class:`~numpy_ml.neural_nets.layers.Multiply`        |
+-----------------------------------------------------+-------------------------------------------------------------+---------------------------------------------------------+
| - :class:`~numpy_ml.neural_nets.layers.Pool2D`      | - :class:`~numpy_ml.neural_nets.layers.RNN`                 | - :class:`~numpy_ml.neural_nets.layers.RNNCell`         |
+-----------------------------------------------------+-------------------------------------------------------------+---------------------------------------------------------+
| - :class:`~numpy_ml.neural_nets.layers.RBM`         | - :class:`~numpy_ml.neural_nets.layers.Softmax`             | - :class:`~numpy_ml.neural_nets.layers.SparseEvolution` |
+-----------------------------------------------------+-------------------------------------------------------------+---------------------------------------------------------+

.. raw:: html

   <h2>Activations</h2>

Each unit in a neural network sums its input and passes it through an
`activation function`_ before sending it on to its outgoing weights. Activation
functions in most modern networks are real-valued, non-linear functions that
are computationally inexpensive to compute and easily differentiable.

The :doc:`Activations <numpy_ml.neural_nets.activations>` module contains a
number of common activation functions.

.. _`activation function`: https://en.wikipedia.org/wiki/Activation_function

**Activations**

+----------------------------------------------------------+--------------------------------------------------------+-------------------------------------------------------+
| - :class:`~numpy_ml.neural_nets.activations.Affine`      | - :class:`~numpy_ml.neural_nets.activations.Identity`  | - :class:`~numpy_ml.neural_nets.activations.Sigmoid`  |
|----------------------------------------------------------|--------------------------------------------------------|-------------------------------------------------------|
| - :class:`~numpy_ml.neural_nets.activations.ELU`         | - :class:`~numpy_ml.neural_nets.activations.LeakyReLU` | - :class:`~numpy_ml.neural_nets.activations.SoftPlus` |
| - :class:`~numpy_ml.neural_nets.activations.Exponential` | - :class:`~numpy_ml.neural_nets.activations.ReLU`      | - :class:`~numpy_ml.neural_nets.activations.Tanh`     |
| - :class:`~numpy_ml.neural_nets.activations.HardSigmoid` | - :class:`~numpy_ml.neural_nets.activations.SELU`      |                                                       |
+----------------------------------------------------------+--------------------------------------------------------+-------------------------------------------------------+

.. raw:: html

   <h2>Losses</h2>

Training a neural network involves searching for layer parameters that optimize
the network's performance on a given task. `Loss functions`_ are the
quantitative metric we use to measure how well the network is performing. Loss
functions are typically scalar-valued functions of a network's output on some
training data.

The :doc:`Losses <numpy_ml.neural_nets.losses>` module contains loss functions
for a number of common tasks.

.. _`Loss functions`: https://en.wikipedia.org/wiki/Loss_function

**Losses**

+------------------------------------------------------+-------------------------------------------------+-----------------------------------------------------+
| - :class:`~numpy_ml.neural_nets.losses.CrossEntropy` | - :class:`~numpy_ml.neural_nets.losses.NCELoss` | - :class:`~numpy_ml.neural_nets.losses.WGAN_GPLoss` |
|------------------------------------------------------|-------------------------------------------------|-----------------------------------------------------|
| - :class:`~numpy_ml.neural_nets.losses.SquaredError` | - :class:`~numpy_ml.neural_nets.losses.VAELoss` |                                                     |
+------------------------------------------------------+-------------------------------------------------+-----------------------------------------------------+

.. raw:: html

   <h2>Optimizers</h2>

The :doc:`Optimizers <numpy_ml.neural_nets.optimizers>` module contains several
popular gradient-based strategies for adjusting the parameters of a neural
network to optimize a loss function. The proper choice of optimization strategy
can help reduce training time / speed up convergence, though see [1]_ for a
discussion on the generalization performance of the solutions identified via
different strategies.

.. [1] Wilson, A. C., Roelofs, R., Stern, M., Srebro, M., & Recht, B. (2017)
   "The marginal value of adaptive gradient methods in machine learning",
   *Proceedings of the 31st Conference on Neural Information Processing
   Systems*. https://arxiv.org/pdf/1705.08292.pdf

**Optimizers**

+-------------------------------------------------+-----------------------------------------------------+--------------------------------------------------+-----------------------------------------------------+
| - :class:`~numpy_ml.neural_nets.optimizers.SGD` | - :class:`~numpy_ml.neural_nets.optimizers.AdaGrad` | - :class:`~numpy_ml.neural_nets.optimizers.Adam` | - :class:`~numpy_ml.neural_nets.optimizers.RMSProp` |
+-------------------------------------------------+-----------------------------------------------------+--------------------------------------------------+-----------------------------------------------------+

.. raw:: html

   <h2>Learning Rate Schedulers</h2>

It is common to reduce an optimizer's learning rate(s) over the course of
training in order to eke out additional performance improvements. The
:doc:`Schedulers <numpy_ml.neural_nets.schedulers>` module contains several
strategies for automatically adjusting the learning rate as a function of the
number of elapsed training steps.

**Schedulers**

+---------------------------------------------------------------+------------------------------------------------------------------+-----------------------------------------------------------+
| - :class:`~numpy_ml.neural_nets.schedulers.ConstantScheduler` | - :class:`~numpy_ml.neural_nets.schedulers.ExponentialScheduler` | - :class:`~numpy_ml.neural_nets.schedulers.KingScheduler` |
+---------------------------------------------------------------+------------------------------------------------------------------+-----------------------------------------------------------+
| - :class:`~numpy_ml.neural_nets.schedulers.NoamScheduler`     |                                                                  |                                                           |
+---------------------------------------------------------------+------------------------------------------------------------------+-----------------------------------------------------------+

.. raw:: html

   <h2>Wrappers</h2>

The :doc:`Wrappers <numpy_ml.neural_nets.wrappers>` module contains classes
that wrap or otherwise modify the behavior of a network layer.

**Wrappers**

- :class:`~numpy_ml.neural_nets.wrappers.Dropout`

.. raw:: html

   <h2>Modules</h2>

Many deep networks consist of stacks of repeated modules. These modules, often
consisting of several layers / layer operations, can themselves be abstracted
in order to simplify the building of more complex networks. The :doc:`Modules
<numpy_ml.neural_nets.modules>` module contains a few common architectural
patterns that appear across a number of popular deep learning approaches.

**Modules**

+-----------------------------------------------------------------------+---------------------------------------------------------------------+-------------------------------------------------------------------+
| - :class:`~numpy_ml.neural_nets.modules.BidirectionalLSTM`            | - :class:`~numpy_ml.neural_nets.modules.MultiHeadedAttentionModule` | - :class:`~numpy_ml.neural_nets.modules.SkipConnectionConvModule` |
+-----------------------------------------------------------------------+---------------------------------------------------------------------+-------------------------------------------------------------------+
| - :class:`~numpy_ml.neural_nets.modules.SkipConnectionIdentityModule` | - :class:`~numpy_ml.neural_nets.modules.WavenetResidualModule`      |                                                                   |
+-----------------------------------------------------------------------+---------------------------------------------------------------------+-------------------------------------------------------------------+


.. raw:: html

   <h2>Full Networks</h2>

The :doc:`Models <numpy_ml.neural_nets.models>` module contains implementations
of several well-known neural networks from recent papers.

**Full Networks**

- :class:`~numpy_ml.neural_nets.models.WGAN_GP`
- :class:`~numpy_ml.neural_nets.models.BernoulliVAE`
- :class:`~numpy_ml.neural_nets.models.Word2Vec`


.. raw:: html

   <h2>Utilities</h2>

The :doc:`Utilities <numpy_ml.neural_nets.utils>` module contains a number of
helper functions for dealing with weight initialization, convolution
arithmetic, padding, and minibatching.

**Utilities**

+---------------------------------------------------------+---------------------------------------------------------+-----------------------------------------------------------+--------------------------------------------------+
| - :class:`~numpy_ml.neural_nets.utils.minibatch`        | - :class:`~numpy_ml.neural_nets.utils.pad1D`            | - :class:`~numpy_ml.neural_nets.utils.calc_fan`           | - :class:`~numpy_ml.neural_nets.utils.col2im`    |
+---------------------------------------------------------+---------------------------------------------------------+-----------------------------------------------------------+--------------------------------------------------+
| - :class:`~numpy_ml.neural_nets.utils.conv2D`           | - :class:`~numpy_ml.neural_nets.utils.pad2D`            | - :class:`~numpy_ml.neural_nets.utils.calc_conv_out_dims` | - :class:`~numpy_ml.neural_nets.utils.conv2D`    |
+---------------------------------------------------------+---------------------------------------------------------+-----------------------------------------------------------+--------------------------------------------------+
| - :class:`~numpy_ml.neural_nets.utils.calc_pad_dims_1D` | - :class:`~numpy_ml.neural_nets.utils.dilate`           | - :class:`~numpy_ml.neural_nets.utils.im2col`             | - :class:`~numpy_ml.neural_nets.utils.conv1D`    |
+---------------------------------------------------------+---------------------------------------------------------+-----------------------------------------------------------+--------------------------------------------------+
| - :class:`~numpy_ml.neural_nets.utils.deconv2D_naive`   | - :class:`~numpy_ml.neural_nets.utils.conv2D_naive`     | - :class:`~numpy_ml.neural_nets.utils.he_uniform`         | - :class:`~numpy_ml.neural_nets.utils.he_normal` |
+---------------------------------------------------------+---------------------------------------------------------+-----------------------------------------------------------+--------------------------------------------------+
| - :class:`~numpy_ml.neural_nets.utils.glorot_uniform`   | - :class:`~numpy_ml.neural_nets.utils.truncated_normal` |                                                           |                                                  |
+---------------------------------------------------------+---------------------------------------------------------+-----------------------------------------------------------+--------------------------------------------------+


.. toctree::
   :maxdepth: 3
   :hidden:

   numpy_ml.neural_nets.layers

   numpy_ml.neural_nets.activations

   numpy_ml.neural_nets.losses

   numpy_ml.neural_nets.optimizers

   numpy_ml.neural_nets.schedulers

   numpy_ml.neural_nets.wrappers

   numpy_ml.neural_nets.modules

   numpy_ml.neural_nets.models

   numpy_ml.neural_nets.utils


================================================
FILE: docs/numpy_ml.neural_nets.schedulers.rst
================================================
Learning rate schedulers
=========================

``ConstantScheduler``
---------------------
.. autoclass:: numpy_ml.neural_nets.schedulers.ConstantScheduler
    :members:
    :undoc-members:
    :inherited-members:

``ExponentialScheduler``
------------------------
.. autoclass:: numpy_ml.neural_nets.schedulers.ExponentialScheduler
    :members:
    :undoc-members:
    :inherited-members:

``KingScheduler``
------------------------
.. autoclass:: numpy_ml.neural_nets.schedulers.KingScheduler
    :members:
    :undoc-members:
    :inherited-members:

``NoamScheduler``
------------------------
.. autoclass:: numpy_ml.neural_nets.schedulers.NoamScheduler
    :members:
    :undoc-members:
    :inherited-members:


================================================
FILE: docs/numpy_ml.neural_nets.utils.rst
================================================
Utilities
==========

``minibatch``
-------------
.. autofunction:: numpy_ml.neural_nets.utils.minibatch

``calc_pad_dims_2D``
--------------------
.. autofunction:: numpy_ml.neural_nets.utils.calc_pad_dims_2D

``calc_pad_dims_1D``
--------------------
.. autofunction:: numpy_ml.neural_nets.utils.calc_pad_dims_1D

``pad1D``
--------------------
.. autofunction:: numpy_ml.neural_nets.utils.pad1D

``pad2D``
--------------------
.. autofunction:: numpy_ml.neural_nets.utils.pad2D

``dilate``
--------------------
.. autofunction:: numpy_ml.neural_nets.utils.dilate

``calc_fan``
--------------------
.. autofunction:: numpy_ml.neural_nets.utils.calc_fan

``calc_conv_out_dims``
-----------------------
.. autofunction:: numpy_ml.neural_nets.utils.calc_conv_out_dims

``im2col``
-----------------------
.. autofunction:: numpy_ml.neural_nets.utils.im2col

``col2im``
-----------------------
.. autofunction:: numpy_ml.neural_nets.utils.col2im

``conv2D``
-----------------------
.. autofunction:: numpy_ml.neural_nets.utils.conv2D

``conv1D``
-----------------------
.. autofunction:: numpy_ml.neural_nets.utils.conv1D

``deconv2D_naive``
-----------------------
.. autofunction:: numpy_ml.neural_nets.utils.deconv2D_naive

``conv2D_naive``
-----------------------
.. autofunction:: numpy_ml.neural_nets.utils.conv2D_naive

``he_uniform``
-----------------------
.. autofunction:: numpy_ml.neural_nets.utils.he_uniform

``he_normal``
-----------------------
.. autofunction:: numpy_ml.neural_nets.utils.he_normal

``glorot_uniform``
-----------------------
.. autofunction:: numpy_ml.neural_nets.utils.glorot_uniform

``glorot_normal``
-----------------------
.. autofunction:: numpy_ml.neural_nets.utils.glorot_normal

``truncated_normal``
-----------------------
.. autofunction:: numpy_ml.neural_nets.utils.truncated_normal


================================================
FILE: docs/numpy_ml.neural_nets.wrappers.rst
================================================
Wrappers
=========

``WrapperBase``
---------------
.. autoclass:: numpy_ml.neural_nets.wrappers.wrappers.WrapperBase
    :members:
    :undoc-members:
    :inherited-members:

``Dropout``
-----------
.. autoclass:: numpy_ml.neural_nets.wrappers.Dropout
    :members:
    :undoc-members:
    :show-inheritance:


================================================
FILE: docs/numpy_ml.ngram.additive.rst
================================================
``AdditiveNGram``
-----------------

.. autoclass:: numpy_ml.ngram.AdditiveNGram
	:members:
	:undoc-members:
	:inherited-members:


================================================
FILE: docs/numpy_ml.ngram.goodturing.rst
================================================
``GoodTuringNGram``
-------------------

.. autoclass:: numpy_ml.ngram.GoodTuringNGram
	:members:
	:undoc-members:
	:inherited-members:


================================================
FILE: docs/numpy_ml.ngram.mle.rst
================================================
``MLENGram``
------------

.. autoclass:: numpy_ml.ngram.MLENGram
        :members:
	:undoc-members:
	:inherited-members:


================================================
FILE: docs/numpy_ml.ngram.rst
================================================
#######################
N-gram smoothing models
#######################

When dealing with `n-gram`_ models, smoothing refers to the practice of
adjusting empirical probability estimates to account for insufficient data.

In the descriptions below, we use the notation :math:`w^{j}_{i}`, :math:`i < j`, to
denote the `(j - i)`-gram :math:`(w_{i}, w_{i+1}, \ldots, w_{j})`.

.. raw:: html

   <h3>Laplace Smoothing</h3>

`Laplace smoothing`_ is the assumption that each `n`-gram in a corpus occurs
exactly one more time than it actually does.

.. math::

    p(w_i \mid w^{i-1}_{i-n+1}) = \frac{1 + c(w^{i}_{i-n+1})}{|V| \sum_{w_i} c(w^{i}_{i-n+1})}

where :math:`c(a)` denotes the empirical count of the `n`-gram :math:`a` in the
corpus, and :math:`|V|` corresponds to the number of unique `n`-grams in the
corpus.

.. _`Laplace smoothing`: https://en.wikipedia.org/wiki/Additive_smoothing

**Models**

- :class:`~numpy_ml.ngram.AdditiveNGram`

.. raw:: html

   <h3>Additive/Lidstone Smoothing</h3>

`Additive/Lidstone smoothing`_ is a generalization of Laplace smoothing, where we
assume that each `n`-gram in a corpus occurs `k` more times than it actually
does (where `k` can be any non-negative value, but typically ranges between `[0, 1]`):

.. math::

    p(w_i \mid w^{i-1}_{i-n+1}) = \frac{k + c(w^{i}_{i-n+1})}{k |V| \sum_{w_i} c(w^{i}_{i-n+1})}

where :math:`c(a)` denotes the empirical count of the `n`-gram :math:`a` in the
corpus, and :math:`|V|` corresponds to the number of unique `n`-grams in the
corpus.

.. _`Additive/Lidstone smoothing`: https://en.wikipedia.org/wiki/Additive_smoothing

**Models**

- :class:`~numpy_ml.ngram.AdditiveNGram`


.. raw:: html

   <h3>Good-Turing Smoothing</h3>

`Good-Turing smoothing`_ is a more sophisticated technique which takes into
account the identity of the particular `n`-gram when deciding the amount of
smoothing to apply. It proceeds by allocating a portion of the probability
space occupied by `n`-grams which occur with count `r+1` and dividing it among
the `n`-grams which occur with rate `r`.

.. math::
    r^*  =  (r + 1) \frac{g(r + 1)}{g(r)} \\
    p(w^{i}_{i-n+1} \mid c(w^{i}_{i-n+1}) = r)  =  \frac{r^*}{N}

where :math:`r^*` is the adjusted count for an `n`-gram which occurs `r` times,
`g(x)` is the number of `n`-grams in the corpus which occur `x` times, and `N`
is the total number of `n`-grams in the corpus.

.. _n-gram: https://en.wikipedia.org/wiki/N-gram
.. _`Good-Turing smoothing`: https://en.wikipedia.org/wiki/Good%E2%80%93Turing_frequency_estimation

**Models**

- :class:`~numpy_ml.ngram.GoodTuringNGram`

**References**

.. [1]  Chen & Goodman (1998). "An empirical study of smoothing techniques
   for language modeling".  *Harvard Computer Science Group Technical Report
   TR-10-98*.
.. [2] Gale & Sampson (1995). "Good-Turing frequency estimation without
   tears". *Journal of Quantitative Linguistics*, 2(3), 217-237.

.. toctree::
   :maxdepth: 3
   :hidden:

   numpy_ml.ngram.mle

   numpy_ml.ngram.additive

   numpy_ml.ngram.goodturing


================================================
FILE: docs/numpy_ml.nonparametric.gp.rst
================================================
``GPRegression``
#################

.. autoclass:: numpy_ml.nonparametric.GPRegression
	:members:
	:undoc-members:
	:inherited-members:


================================================
FILE: docs/numpy_ml.nonparametric.kernel_regression.rst
================================================
``KernelRegression``
#####################

.. autoclass:: numpy_ml.nonparametric.KernelRegression
	:members:
	:undoc-members:
	:inherited-members:


================================================
FILE: docs/numpy_ml.nonparametric.knn.rst
================================================
``KNN``
#######

.. autoclass:: numpy_ml.nonparametric.KNN
	:members:
	:undoc-members:
	:inherited-members:


================================================
FILE: docs/numpy_ml.nonparametric.rst
================================================
Nonparametric models
####################

.. raw:: html

   <h2>K-Nearest Neighbors</h2>

The `k-nearest neighbors`_ (KNN) model is a nonparametric supervised learning
approach that can be applied to classification or regression problems. In a
classification context, the KNN model assigns a class label for a new datapoint
by taking a majority vote amongst the labels for the `k` closest points
("neighbors") in the training data. Similarly, in a regression context, the KNN
model predicts the target value associated with a new datapoint by taking the
average of the targets associated with the `k` closes points in the training
data.

.. _`k-nearest neighbors`: https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm

**Models**

- :class:`~numpy_ml.nonparametric.KNN`

.. raw:: html

   <h2>Gaussian Process Regression</h2>

A `Gaussian process`_ defines a prior distribution over functions mapping
:math:`X \rightarrow \mathbb{R}`, where `X` can be any finite (or
infinite!)-dimensional set.

Let :math:`f(x_k)` be the random variable corresponding to
the value of a function `f` at a point :math:`x_k \in X`. Define a random
variable :math:`z = [f(x_1), \ldots, f(x_N)]` for any finite set of points
:math:`\{x_1, \ldots, x_N\} \subset X`. If `f` is distributed according to a
Gaussian Process, it is the case that

.. math::

    z \sim \mathcal{N}(\mu, K)

for

.. math::

    \mu  &=  [\text{mean}(x_1), \ldots, \text{mean}(x_N)] \\
    K_{ij}  &=  \text{kernel}(x_i, x_j)

where mean is the mean function (in Gaussian process regression it is common
to define mean(`x`) = 0), and `kernel` is a :doc:`kernel
<numpy_ml.utils.kernels>` / covariance function that determines the general
shape of the GP prior over functions, `p(f)`.

In `Gaussian process regression`_ (AKA simple Kriging [2]_ [3]_), a Gaussian
process is used as a prior on functions and is combined with the Gaussian
likelihood from the linear model via Bayes' rule to compute a posterior over
functions `f`:

.. math::

    y \mid X, f  &\sim  \mathcal{N}( [f(x_1), \ldots, f(x_n)], \alpha I ) \\
    f \mid X     &\sim  \text{GP}(0, K)

Due to the conjugacy of the Gaussian Process prior with the regression model's
Gaussian likelihood, the posterior will also be Gaussian and can be computed in
closed form.

.. _`Gaussian process`: https://en.wikipedia.org/wiki/Gaussian_process
.. _`Gaussian process regression`: https://en.wikipedia.org/wiki/Kriging

**Models**

- :class:`~numpy_ml.nonparametric.GPRegression`

**References**

.. [1] Rasmussen, C. E., & Williams, C. K. I. (2006). Gaussian Processes for
   Machine Learning. MIT Press, Cambridge, MA.
.. [2] Krige, D. G., (1951). "A statistical approach to some mine valuations and
   allied problems at the Witwatersrand", *Master's thesis of the University of
   Witwatersrand*.
.. [3] Matheron, G., (1963). "Principles of geostatistics", *Economic Geology, 58*, 1246-1266.

.. raw:: html

   <h2>Kernel Regression</h2>

Kernel regression is another nonparametric approach to nonlinear regression.
Like the Gaussian Process regression approach (or, more generally, all
regression models), kernel regression attempts to learn a function `f` which
captures the conditional expectation of some targets **y** given the data
**X**, under the assumption that

.. math::
    y_i = f(x_i) + \epsilon_i \ \ \ \ \text{where } \mathbb{E}[\epsilon | \mathbf{x}] = \mathbb{E}[\epsilon] = 0

Unlike the Gaussian Process regression approach, however, kernel regression
does not place a prior over `f`. Instead, it models :math:`f = \mathbb{E}[y |
X] = \int_y \frac{p(X, y)}{p(X)} y \ \text{d}y` using a :doc:`kernel function
<numpy_ml.utils.kernels>`, `k`, to estimate the smoothed data probabilities.
For example, the :class:`Nadaraya-Watson <numpy_ml.nonparametric.KernelRegression>`
estimator [4]_ [5]_ uses the following probability estimates:

.. math::
    \hat{p}(X)  &=  \prod_{i=1}^N \hat{p}(x_i) = \prod_{i=1}^N \sum_{j=1}^N \frac{k(x_i - x_j)}{N} \\
    \hat{p}(X, y)  &  \prod_{i=1}^N \hat{p}(x_i, y_i) = \prod_{i=1}^N \sum_{j=1}^N \frac{k(x_i - x_j) k(y_i - y_j)}{N}


**Models**

- :class:`~numpy_ml.nonparametric.KernelRegression`

**References**

.. [4] Nadaraya, E. A. (1964). "On estimating regression". *Theory of
   Probability and Its Applications, 9 (1)*, 141-2.
.. [5] Watson, G. S. (1964). "Smooth regression analysis". *Sankhyā: The Indian
   Journal of Statistics, Series A. 26 (4)*, 359–372.

.. raw:: html

   <h2>See Also</h2>

The :doc:`trees <numpy_ml.trees>` module contains other classic nonparametric
approaches, including :doc:`decision trees <numpy_ml.trees.dt>`,
:doc:`random forests <numpy_ml.trees.rf>`, and :doc:`gradient
boosted decision trees <numpy_ml.trees.gbdt>`.

.. toctree::
   :maxdepth: 2
   :hidden:

   numpy_ml.nonparametric.knn
   numpy_ml.nonparametric.gp
   numpy_ml.nonparametric.kernel_regression


================================================
FILE: docs/numpy_ml.preprocessing.dsp.rst
================================================
Digital signal processing
#########################

``DCT``
-------
.. autofunction:: numpy_ml.preprocessing.dsp.DCT

``DFT``
-------
.. autofunction:: numpy_ml.preprocessing.dsp.DFT

``dft_bins``
------------
.. autofunction:: numpy_ml.preprocessing.dsp.dft_bins

``magnitude_spectrum``
----------------------
.. autofunction:: numpy_ml.preprocessing.dsp.magnitude_spectrum

``power_spectrum``
------------------
.. autofunction:: numpy_ml.preprocessing.dsp.power_spectrum

``batch_resample``
------------------
.. autofunction:: numpy_ml.preprocessing.dsp.batch_resample

``nn_interpolate_2D``
---------------------
.. autofunction:: numpy_ml.preprocessing.dsp.nn_interpolate_2D

``nn_interpolate_1D``
---------------------
.. autofunction:: numpy_ml.preprocessing.dsp.nn_interpolate_1D

``bilinear_interpolate``
-------------------------
.. autofunction:: numpy_ml.preprocessing.dsp.bilinear_interpolate

``to_frames``
-------------

.. autofunction:: numpy_ml.preprocessing.dsp.to_frames

``autocorrelate1D``
-------------------

.. autofunction:: numpy_ml.preprocessing.dsp.autocorrelate1D

``preemphasis``
---------------

.. autofunction:: numpy_ml.preprocessing.dsp.preemphasis

``cepstral_lifter``
-------------------

.. autofunction:: numpy_ml.preprocessing.dsp.cepstral_lifter

``mel_spectrogram``
-------------------

.. autofunction:: numpy_ml.preprocessing.dsp.mel_spectrogram

``mfcc``
--------

.. autofunction:: numpy_ml.preprocessing.dsp.mfcc

``mel2hz``
----------

.. autofunction:: numpy_ml.preprocessing.dsp.mel2hz

``hz2mel``
----------

.. autofunction:: numpy_ml.preprocessing.dsp.hz2mel

``mel_filterbank``
------------------

.. autofunction:: numpy_ml.preprocessing.dsp.mel_filterbank


================================================
FILE: docs/numpy_ml.preprocessing.general.rst
================================================
General
#######

``FeatureHasher``
-----------------

.. autoclass:: numpy_ml.preprocessing.general.FeatureHasher
	:members:
	:undoc-members:
	:inherited-members:

``OneHotEncoder``
-----------------

.. autoclass:: numpy_ml.preprocessing.general.OneHotEncoder
	:members:
	:undoc-members:
	:inherited-members:

``Standardizer``
----------------

.. autoclass:: numpy_ml.preprocessing.general.Standardizer
	:members:
	:undoc-members:
	:inherited-members:

``minibatch``
-------------

.. automodule:: numpy_ml.preprocessing.general
	:members: minibatch


================================================
FILE: docs/numpy_ml.preprocessing.nlp.rst
================================================
Natural language processing
###########################

``BytePairEncoder``
-------------------

.. autoclass:: numpy_ml.preprocessing.nlp.BytePairEncoder
	:members:
	:undoc-members:
	:inherited-members:

``HuffmanEncoder``
------------------

.. autoclass:: numpy_ml.preprocessing.nlp.HuffmanEncoder
	:members:
	:undoc-members:
	:inherited-members:

``TFIDFEncoder``
------------------

.. autoclass:: numpy_ml.preprocessing.nlp.TFIDFEncoder
	:members:
	:undoc-members:
	:inherited-members:

``Vocabulary``
--------------

.. autoclass:: numpy_ml.preprocessing.nlp.Vocabulary
	:members:
	:undoc-members:
	:inherited-members:

``Token``
---------

.. autoclass:: numpy_ml.preprocessing.nlp.Token
	:members:
	:undoc-members:
	:inherited-members:

``ngrams``
-----------

.. autofunction:: numpy_ml.preprocessing.nlp.ngrams

``remove_stop_words``
---------------------

.. autofunction:: numpy_ml.preprocessing.nlp.remove_stop_words

``strip_punctuation``
---------------------

.. autofunction:: numpy_ml.preprocessing.nlp.strip_punctuation

``tokenize_words``
-------------------

.. autofunction:: numpy_ml.preprocessing.nlp.tokenize_words

``tokenize_whitespace``
------------------------

.. autofunction:: numpy_ml.preprocessing.nlp.tokenize_whitespace

``tokenize_chars``
-------------------

.. autofunction:: numpy_ml.preprocessing.nlp.tokenize_chars

``tokenize_bytes_raw``
-----------------------

.. autofunction:: numpy_ml.preprocessing.nlp.tokenize_bytes_raw

``bytes_to_chars``
-----------------------

.. autofunction:: numpy_ml.preprocessing.nlp.bytes_to_chars


================================================
FILE: docs/numpy_ml.preprocessing.rst
================================================
Preprocessing
#############

.. toctree::
   :maxdepth: 3

   numpy_ml.preprocessing.general

   numpy_ml.preprocessing.dsp

   numpy_ml.preprocessing.nlp


================================================
FILE: docs/numpy_ml.rl_models.agents.rst
================================================
Agents
======

``CrossEntropyAgent``
---------------------
.. autoclass:: numpy_ml.rl_models.agents.CrossEntropyAgent
    :members:
    :undoc-members:
    :inherited-members:

``DynaAgent``
-------------
.. autoclass:: numpy_ml.rl_models.agents.DynaAgent
    :members:
    :undoc-members:
    :inherited-members:

``MonteCarloAgent``
-------------------
Monte Carlo methods are ways of solving RL problems based on averaging
sample returns for each state-action pair. Parameters are updated only at
the completion of an episode.

In on-policy learning, the agent maintains a single policy that it updates
over the course of training. In order to ensure the policy converges to a
(near-) optimal policy, the agent must maintain that the policy assigns
non-zero probability to ALL state-action pairs during training to ensure
continual exploration.

- Thus on-policy learning is a compromise--it learns action values not for the optimal policy, but for a *near*-optimal policy that still explores.

In off-policy learning, the agent maintains two separate policies:

1. **Target policy**: The policy that is learned during training and that will eventually become the optimal policy.
2. **Behavior policy**: A policy that is more exploratory and is used to generate behavior during training.

Off-policy methods are often of greater variance and are slower to
converge. On the other hand, off-policy methods are more powerful and
general than on-policy methods.

.. autoclass:: numpy_ml.rl_models.agents.MonteCarloAgent
    :members:
    :undoc-members:
    :inherited-members:

``TemporalDifferenceAgent``
---------------------------

Temporal difference methods are examples of bootstrapping in that they update
their estimate for the value of state `s` on the basis of a previous estimate.

Advantages of TD algorithms:

1. They do not require a model of the environment, its reward, or its next-state probability distributions.
2. They are implemented in an online, fully incremental fashion. This allows them to be used with infinite-horizons / when episodes take prohibitively long to finish.
3. TD algorithms learn from each transition regardless of what subsequent actions are taken.
4. In practice, TD methods have usually been found to converge faster than constant-:math:`\alpha` Monte Carlo methods on stochastic tasks.

.. autoclass:: numpy_ml.rl_models.agents.TemporalDifferenceAgent
    :members:
    :undoc-members:
    :inherited-members:


================================================
FILE: docs/numpy_ml.rl_models.rl_utils.rst
================================================
Utilities
=========

.. automodule:: numpy_ml.rl_models.rl_utils
    :members:
    :inherited-members:


================================================
FILE: docs/numpy_ml.rl_models.rst
================================================
Reinforcement learning
######################

.. toctree::
   :maxdepth: 3

   numpy_ml.rl_models.agents

   numpy_ml.rl_models.trainer

   numpy_ml.rl_models.rl_utils


================================================
FILE: docs/numpy_ml.rl_models.trainer.rst
================================================
Training
========

``Trainer``
-----------

.. automodule:: numpy_ml.rl_models.trainer
    :members:
    :undoc-members:
    :inherited-members:


================================================
FILE: docs/numpy_ml.trees.dt.rst
================================================
################
``DecisionTree``
################

.. autoclass:: numpy_ml.trees.DecisionTree
	:members:
	:undoc-members:
	:inherited-members:


================================================
FILE: docs/numpy_ml.trees.gbdt.rst
================================================
``GradientBoostedDecisionTree``
###############################

.. autoclass:: numpy_ml.trees.GradientBoostedDecisionTree
	:members:
	:undoc-members:
	:inherited-members:


================================================
FILE: docs/numpy_ml.trees.losses.rst
================================================
#########################
Losses (``trees.losses``)
#########################

.. automodule:: numpy_ml.trees.losses
	:members:
	:undoc-members:
	:inherited-members:
	:show-inheritance:


================================================
FILE: docs/numpy_ml.trees.rf.rst
================================================
``RandomForest``
################

.. autoclass:: numpy_ml.trees.RandomForest
	:members:
	:undoc-members:
	:inherited-members:


================================================
FILE: docs/numpy_ml.trees.rst
================================================
Tree-based models
#################
.. raw:: html

   <h2>Decision Trees</h2>

`Decision trees`_ [1]_ are popular nonparametric models that iteratively split a
training dataset into smaller, more homogenous subsets. Each node in the tree
is associated with a decision rule, which dictates how to divide the data the
node inherits from its parent among each of its children. Each leaf node is
associated with at least one data point from the original training set.

.. figure:: img/decision_tree.png
    :width: 95%
    :align: center

    A binary decision tree trained on the dataset :math:`X = \{ \mathbf{x}_1,
    \ldots, \mathbf{x}_{10} \}`. Each example in the dataset is a 4-dimensional
    vector of real-valued features labeled :math:`x_1, \ldots, x_4`. Unshaded
    circles correspond to internal decision nodes, while shaded circles
    correspond to leaf nodes. Each leaf node is associated with a subset of the
    examples in `X`, selected based on the decision rules along the path from
    root to leaf.

At test time, new examples travel from the tree root to one of the leaves,
their path through the tree determined by the decision rules at each of the
nodes it visits. When a test example arrives at a leaf node, the targets for
the training examples at that leaf node are used to compute the model's
prediction.

Training decision trees corresponds to learning the set of decision rules to
partition the training data. This learning process proceeds greedily by
selecting the decision rule at each node that results in the greatest reduction
in an inhomogeneity or "impurity" metric, :math:`\mathcal{L}`. One popular
metric is the **information entropy**:

.. math::

    -\sum_j P_n(\omega_j) \log P_n(\omega_j)

where :math:`P_n(\omega_j)` is the fraction of data points at split `n` that are
associated with category :math:`\omega_j`. Another useful metric is the **Gini
impurity**:

.. math::

    \sum_{i \neq j} P_n(\omega_i) P_n(\omega_j) = 1 - \sum_{j} P_n(\omega_j)^2

For a binary tree (where each node has only two children), the reduction in
impurity after a particular split is

.. math::

    \Delta \mathcal{L} = \mathcal{L}(\text{Parent}) -
        P_{\text{left}} \mathcal{L}(\text{Left child}) -
            (1 - P_{\text{left}})\mathcal{L}(\text{Right child})

where :math:`\mathcal{L}(x)` is the impurity of the dataset at node `x`,
and :math:`P_{\text{left}}`/:math:`P_{\text{right}}` are the proportion of
examples at the current node that are partitioned into the left / right
children, respectively, by the proposed split.

.. _`Decision trees`: https://en.wikipedia.org/wiki/Decision_tree_learning

**Models**

- :class:`~numpy_ml.trees.DecisionTree`

**References**

.. [1] Breiman, L., Friedman, J. H., Olshen, R. A., and Stone, C. J. (1984).
   Classification and regression trees. Monterey, CA: Wadsworth & Brooks/Cole
   Advanced Books & Software.

.. raw:: html

   <h2>Bootstrap Aggregating</h2>

`Bootstrap aggregating`_ (bagging) methods [2]_ are an `ensembling approach`_ that
proceeds by creating `n` bootstrapped samples of a training dataset by sampling
from it with replacement. A separate learner is fit on each of the `n`
bootstrapped datasets, with the final bootstrap aggregated model prediction
corresponding to the average (or majority vote, for classifiers) across each
of the `n` learners' predictions for a given datapoint.

The `random forest`_ model [3]_ [4]_ is a canonical example of bootstrap
aggregating. For this approach, each of the `n` learners is a different
decision tree. In addition to training each decision tree on a different
bootstrapped dataset, random forests employ a `random subspace`_ approach [5]_:
each decision tree is trained on a subsample (without replacement) of the full
collection of dataset features.

.. _`Bootstrap aggregating`: https://en.wikipedia.org/wiki/Bootstrap_aggregating
.. _`random forest`: https://en.wikipedia.org/wiki/Random_forest
.. _`ensembling approach`: https://en.wikipedia.org/wiki/Ensemble_learning
.. _`random subspace`: https://en.wikipedia.org/wiki/Random_subspace_method

**Models**

- :class:`~numpy_ml.trees.RandomForest`

**References**

.. [2] Breiman, L. (1994). "Bagging predictors". *Technical Report 421.
   Statistics Department, UC Berkeley*.
.. [3] Ho, T. K. (1995). "Random decision forests". *Proceedings of the Third
   International Conference on Document Analysis and Recognition, 1*: 278-282.
.. [4] Breiman, L. (2001). "Random forests". *Machine Learning. 45(1)*: 5-32.
.. [5] Ho, T. K. (1998). "The random subspace method for constructing decision
   forests". *IEEE Transactions on Pattern Analysis and Machine Intelligence.
   20(8)*: 832-844.

.. raw:: html

   <h2>Gradient Boosting</h2>

`Gradient boosting`_ [6]_ [7]_ [8]_ is another popular `ensembling technique`_
that proceeds by iteratively fitting a sequence of `m` weak learners such that:

.. math::

    f_m(X) = b(X) + \eta w_1 g_1 + \ldots + \eta w_m g_m

where `b` is a fixed initial estimate for the targets, :math:`\eta` is
a learning rate parameter, and :math:`w_{i}` and :math:`g_{i}`
denote the weights and predictions of the :math:`i^{th}` learner.

At each training iteration a new weak learner is fit to predict the negative
gradient of the loss with respect to the previous prediction,
:math:`\nabla_{f_{i-1}} \mathcal{L}(y, \ f_{i-1}(X))`.  We then use the
element-wise product of the predictions of this weak learner, :math:`g_i`, with
a weight, :math:`w_i`, computed via, e.g., `line-search`_ on the objective
:math:`w_i = \arg \min_{w} \sum_{j=1}^n \mathcal{L}(y_j, f_{i-1}(x_j) + w g_i)`
, to adjust the predictions of the model from the previous iteration,
:math:`f_{i-1}(X)`:

.. math::

    f_i(X) := f_{i-1}(X) + w_i g_i

The current module implements gradient boosting using decision trees as the
weak learners.

.. _`Gradient boosting`: https://en.wikipedia.org/wiki/Gradient_boosting
.. _`ensembling technique`: https://en.wikipedia.org/wiki/Ensemble_learning
.. _`line-search`: https://en.wikipedia.org/wiki/Line_search

**Models**

- :class:`~numpy_ml.trees.GradientBoostedDecisionTree`

**References**

.. [6]  Breiman, L. (1997). "Arcing the edge". *Technical Report 486.
   Statistics Department, UC Berkeley*.
.. [7] Friedman, J. H. (1999). "Greedy function approximation: A gradient
   boosting machine". *IMS 1999 Reitz Lecture*.
.. [8]  Mason, L., Baxter, J., Bartlett, P. L., Frean, M. (1999). "Boosting
   algorithms as gradient descent" *Advances in Neural Information Processing
   Systems, 12*: 512–518.

.. toctree::
   :maxdepth: 3
   :hidden:

   numpy_ml.trees.dt

   numpy_ml.trees.rf

   numpy_ml.trees.gbdt


================================================
FILE: docs/numpy_ml.utils.data_structures.rst
================================================
Data structures
================

``BallTree``
------------

.. autoclass:: numpy_ml.utils.data_structures.BallTree
	:members:
	:undoc-members:
	:inherited-members:

``DiscreteSampler``
-------------------

.. autoclass:: numpy_ml.utils.data_structures.DiscreteSampler
	:members:
	:undoc-members:
	:inherited-members:

``PriorityQueue``
-----------------

.. autoclass:: numpy_ml.utils.data_structures.PriorityQueue
	:members:
	:undoc-members:
	:inherited-members:

``PQNode``
-----------------

.. autoclass:: numpy_ml.utils.data_structures.PQNode
	:members:
	:undoc-members:
	:inherited-members:

``Dict``
--------

.. autoclass:: numpy_ml.utils.data_structures.Dict
	:members:
	:undoc-members:
        :show-inheritance:


================================================
FILE: docs/numpy_ml.utils.distance_metrics.rst
================================================
Distance metrics
================

Common distance functions.

``euclidean``
---------------
.. autofunction:: numpy_ml.utils.distance_metrics.euclidean

``chebyshev``
---------------
.. autofunction:: numpy_ml.utils.distance_metrics.chebyshev

``hamming``
-------------
.. autofunction:: numpy_ml.utils.distance_metrics.hamming

``manhattan``
--------------
.. autofunction:: numpy_ml.utils.distance_metrics.manhattan

``minkowski``
--------------
.. autofunction:: numpy_ml.utils.distance_metrics.minkowski


================================================
FILE: docs/numpy_ml.utils.graphs.rst
================================================
Graphs
======

``Graph``
---------
.. autoclass:: numpy_ml.utils.graphs.Graph
    :members:
    :undoc-members:
    :inherited-members:

``Edge``
--------
.. autoclass:: numpy_ml.utils.graphs.Edge
    :members:
    :undoc-members:
    :inherited-members:

``DiGraph``
-----------
.. autoclass:: numpy_ml.utils.graphs.DiGraph
    :members:
    :undoc-members:
    :show-inheritance:

``UndirectedGraph``
-------------------
.. autoclass:: numpy_ml.utils.graphs.UndirectedGraph
    :members:
    :undoc-members:
    :show-inheritance:

``random_unweighted_graph``
---------------------------

.. autofunction:: numpy_ml.utils.graphs.random_unweighted_graph

``random_DAG``
--------------

.. autofunction:: numpy_ml.utils.graphs.random_DAG


================================================
FILE: docs/numpy_ml.utils.kernels.rst
================================================
Kernels
=======

A collection of common kernel / similarity functions. All kernels are
continuous, bounded, and symmetric real functions which integrate to 1.

``LinearKernel``
----------------

.. autoclass:: numpy_ml.utils.kernels.LinearKernel
    :members:
    :undoc-members:
    :inherited-members:

``PolynomialKernel``
--------------------

.. autoclass:: numpy_ml.utils.kernels.PolynomialKernel
    :members:
    :undoc-members:
    :inherited-members:

``RBFKernel``
-------------

.. autoclass:: numpy_ml.utils.kernels.RBFKernel
    :members:
    :undoc-members:
    :inherited-members:


================================================
FILE: docs/numpy_ml.utils.rst
================================================
Utilities
#########

.. toctree::
   :maxdepth: 3

   numpy_ml.utils.data_structures

   numpy_ml.utils.distance_metrics

   numpy_ml.utils.graphs

   numpy_ml.utils.kernels

   numpy_ml.utils.windows

   numpy_ml.utils.testing


================================================
FILE: docs/numpy_ml.utils.testing.rst
================================================
Testing
-------
Common helper functions for testing the ML algorithms in the rest of the repo.

.. automodule:: numpy_ml.utils.testing
    :members:
    :undoc-members:
    :inherited-members:
    :show-inheritance:


================================================
FILE: docs/numpy_ml.utils.windows.rst
================================================
Window functions
================
In digital signal processing, windowing functions are useful to counteract the
assumption made by the FFT that data is infinite and to reduce spectral
leakage.

``blackman_harris``
-------------------

.. autofunction:: numpy_ml.utils.windows.blackman_harris

``generalized_cosine``
----------------------

.. autofunction:: numpy_ml.utils.windows.generalized_cosine

``hamming``
-----------

.. autofunction:: numpy_ml.utils.windows.hamming

``hann``
-----------

.. autofunction:: numpy_ml.utils.windows.hann


================================================
FILE: docs/requirements.txt
================================================
numpy
scipy

# all this is for the dang tests
matplotlib
seaborn
pandas
sklearn
huffman


================================================
FILE: numpy_ml/README.md
================================================
# Models
This repo includes code for the following models:

1. **Gaussian mixture model**
    - EM training

2. **Hidden Markov model**
    - Viterbi decoding
    - Likelihood computation
    - MLE parameter estimation via Baum-Welch/forward-backward algorithm

3. **Latent Dirichlet allocation** (topic model)
    - Standard model with MLE parameter estimation via variational EM
    - Smoothed model with MAP parameter estimation via MCMC

4. **Neural networks**
    * Layers / Layer-wise ops
        - Add
        - Flatten
        - Multiply
        - Softmax
        - Fully-connected/Dense
        - Sparse evolutionary connections
        - LSTM
        - Elman-style RNN
        - Max + average pooling
        - Dot-product attention
        - Embedding layer
        - Restricted Boltzmann machine (w. CD-n training)
        - 2D deconvolution (w. padding and stride)
        - 2D convolution (w. padding, dilation, and stride)
        - 1D convolution (w. padding, dilation, stride, and causality)
    * Modules
        - Bidirectional LSTM
        - ResNet-style residual blocks (identity and convolution)
        - WaveNet-style residual blocks with dilated causal convolutions
        - Transformer-style multi-headed scaled dot product attention
    * Regularizers
        - Dropout
    * Normalization
        - Batch normalization (spatial and temporal)
        - Layer normalization (spatial and temporal)
    * Optimizers
        - SGD w/ momentum
        - AdaGrad
        - RMSProp
        - Adam
    * Learning Rate Schedulers
        - Constant
        - Exponential
        - Noam/Transformer
        - Dlib scheduler
    * Weight Initializers
        - Glorot/Xavier uniform and normal
        - He/Kaiming uniform and normal
        - Standard and truncated normal
    * Losses
        - Cross entropy
        - Squared error
        - Bernoulli VAE loss
        - Wasserstein loss with gradient penalty
        - Noise contrastive estimation loss
    * Activations
        - ReLU
        - Tanh
        - Affine
        - Sigmoid
        - Leaky ReLU
        - ELU
        - SELU
        - Exponential
        - Hard Sigmoid
        - Softplus
    * Models
        - Bernoulli variational autoencoder
        - Wasserstein GAN with gradient penalty
        - word2vec encoder with skip-gram and CBOW architectures
    * Utilities
        - `col2im` (MATLAB port)
        - `im2col` (MATLAB port)
        - `conv1D`
        - `conv2D`
        - `deconv2D`
        - `minibatch`

5. **Tree-based models**
    - Decision trees (CART)
    - [Bagging] Random forests
    - [Boosting] Gradient-boosted decision trees

6. **Linear models**
    - Ridge regression
    - Logistic regression
    - Ordinary least squares
    - Gaussian naive Bayes classifier
    - Generalized linear model (identity, log, and logit links)
    - Bayesian linear regression w/ conjugate priors
        - Unknown mean, known variance (Gaussian prior)
        - Unknown mean, unknown variance (Normal-Gamma / Normal-Inverse-Wishart prior)

7. **n-Gram sequence models**
    - Maximum likelihood scores
    - Additive/Lidstone smoothing
    - Simple Good-Turing smoothing

8. **Multi-armed bandit models**
    - UCB1
    - LinUCB
    - Epsilon-greedy
    - Thompson sampling w/ conjugate priors
        - Beta-Bernoulli sampler
    - LinUCB

8. **Reinforcement learning models**
    - Cross-entropy method agent
    - First visit on-policy Monte Carlo agent
    - Weighted incremental importance sampling Monte Carlo agent
    - Expected SARSA agent
    - TD-0 Q-learning agent
    - Dyna-Q / Dyna-Q+ with prioritized sweeping

9. **Nonparameteric models**
    - Nadaraya-Watson kernel regression
    - k-Nearest neighbors classification and regression
    - Gaussian process regression

10. **Matrix factorization**
    - Regularized alternating least-squares
    - Non-negative matrix factorization

11. **Preprocessing**
    - Discrete Fourier transform (1D signals)
    - Discrete cosine transform (type-II) (1D signals)
    - Bilinear interpolation (2D signals)
    - Nearest neighbor interpolation (1D and 2D signals)
    - Autocorrelation (1D signals)
    - Signal windowing
    - Text tokenization
    - Feature hashing
    - Feature standardization
    - One-hot encoding / decoding
    - Huffman coding / decoding
    - Byte pair encoding / decoding
    - Term frequency-inverse document frequency (TF-IDF) encoding
    - MFCC encoding

12. **Utilities**
    - Similarity kernels
    - Distance metrics
    - Priority queue
    - Ball tree
    - Discrete sampler
    - Graph processing and generators


================================================
FILE: numpy_ml/__init__.py
================================================
# noqa
"""Common ML and ML-adjacent algorithms implemented in NumPy"""

from . import utils
from . import preprocessing

from . import gmm
from . import hmm
from . import lda
from . import linear_models
from . import neural_nets
from . import ngram
from . import nonparametric
from . import rl_models
from . import trees
from . import bandits
from . import factorization


================================================
FILE: numpy_ml/bandits/README.md
================================================
# Bandits
The `bandit.py` module includes several simple multi-arm bandit
environments.

The `policies.py` module implements a number of standard multi-arm bandit
policies.

1. **Bandits**
    - MAB: Bernoulli, Multinomial, and Gaussian payout distributions
    - Contextual MAB: Linear contextual bandits

2. **Policies**
    - Epsilon-greedy
    - UCB1 ([Auer, Cesa-Bianchi, & Fisher, 2002](https://link.springer.com/content/pdf/10.1023/A:1013689704352.pdf))
    - Conjugate Thompson sampler for Bernoulli bandits ([Thompson, 1933](https://www.gwern.net/docs/statistics/decision/1933-thompson.pdf); [Chapelle & Li, 2010](https://papers.nips.cc/paper/4321-an-empirical-evaluation-of-thompson-sampling.pdf))
    - LinUCB ([Li, Chu, Langford, & Schapire, 2010](http://rob.schapire.net/papers/www10.pdf))

## Plots
<p align="center">
<img src="img/ThompsonSamplingBetaBinomial.png" align='center' height="400" />

<img src="img/UCB1.png" align='center' height="400" />

<img src="img/EpsilonGreedy.png" align='center' height="400" />
</p>


================================================
FILE: numpy_ml/bandits/__init__.py
================================================
from .bandits import *
from . import policies
from . import trainer


================================================
FILE: numpy_ml/bandits/bandits.py
================================================
"""A module containing different variations on multi-armed bandit environments."""

from abc import ABC, abstractmethod

import numpy as np

from numpy_ml.utils.testing import random_one_hot_matrix, is_number


class Bandit(ABC):
    def __init__(self, rewards, reward_probs, context=None):
        assert len(rewards) == len(reward_probs)
        self.step = 0
        self.n_arms = len(rewards)

        super().__init__()

    def __repr__(self):
        """A string representation for the bandit"""
        HP = self.hyperparameters
        params = ", ".join(["{}={}".format(k, v) for (k, v) in HP.items() if k != "id"])
        return "{}({})".format(HP["id"], params)

    @property
    def hyperparameters(self):
        """A dictionary of the bandit hyperparameters"""
        return {}

    @abstractmethod
    def oracle_payoff(self, context=None):
        """
        Return the expected reward for an optimal agent.

        Parameters
        ----------
        context : :py:class:`ndarray <numpy.ndarray>` of shape `(D, K)` or None
            The current context matrix for each of the bandit arms, if
            applicable. Default is None.

        Returns
        -------
        optimal_rwd : float
            The expected reward under an optimal policy.
        """
        pass

    def pull(self, arm_id, context=None):
        """
        "Pull" (i.e., sample from) a given arm's payoff distribution.

        Parameters
        ----------
        arm_id : int
            The integer ID of the arm to sample from
        context : :py:class:`ndarray <numpy.ndarray>` of shape `(D,)` or None
            The context vector for the current timestep if this is a contextual
            bandit. Otherwise, this argument is unused and defaults to None.

        Returns
        -------
        reward : float
            The reward sampled from the given arm's payoff distribution
        """
        assert arm_id < self.n_arms

        self.step += 1
        return self._pull(arm_id, context)

    def reset(self):
        """Reset the bandit step and action counters to zero."""
        self.step = 0

    @abstractmethod
    def _pull(self, arm_id):
        pass


class MultinomialBandit(Bandit):
    def __init__(self, payoffs, payoff_probs):
        """
        A multi-armed bandit where each arm is associated with a different
        multinomial payoff distribution.

        Parameters
        ----------
        payoffs : ragged list of length `K`
            The payoff values for each of the `n` bandits. ``payoffs[k][i]``
            holds the `i` th payoff value for arm `k`.
        payoff_probs : ragged list of length `K`
            A list of the probabilities associated with each of the payoff
            values in ``payoffs``. ``payoff_probs[k][i]`` holds the probability
            of payoff index `i` for arm `k`.
        """
        super().__init__(payoffs, payoff_probs)

        for r, rp in zip(payoffs, payoff_probs):
            assert len(r) == len(rp)
            np.testing.assert_almost_equal(sum(rp), 1.0)

        payoffs = np.array([np.array(x) for x in payoffs])
        payoff_probs = np.array([np.array(x) for x in payoff_probs])

        self.payoffs = payoffs
        self.payoff_probs = payoff_probs
        self.arm_evs = np.array([sum(p * v) for p, v in zip(payoff_probs, payoffs)])
        self.best_ev = np.max(self.arm_evs)
        self.best_arm = np.argmax(self.arm_evs)

    @property
    def hyperparameters(self):
        """A dictionary of the bandit hyperparameters"""
        return {
            "id": "MultinomialBandit",
            "payoffs": self.payoffs,
            "payoff_probs": self.payoff_probs,
        }

    def oracle_payoff(self, context=None):
        """
        Return the expected reward for an optimal agent.

        Parameters
        ----------
        context : :py:class:`ndarray <numpy.ndarray>` of shape `(D, K)` or None
            Unused. Default is None.

        Returns
        -------
        optimal_rwd : float
            The expected reward under an optimal policy.
        optimal_arm : float
            The arm ID with the largest expected reward.
        """
        return self.best_ev, self.best_arm

    def _pull(self, arm_id, context):
        payoffs = self.payoffs[arm_id]
        probs = self.payoff_probs[arm_id]
        return np.random.choice(payoffs, p=probs)


class BernoulliBandit(Bandit):
    def __init__(self, payoff_probs):
        """
        A multi-armed bandit where each arm is associated with an independent
        Bernoulli payoff distribution.

        Parameters
        ----------
        payoff_probs : list of length `K`
            A list of the payoff probability for each arm. ``payoff_probs[k]``
            holds the probability of payoff for arm `k`.
        """
        payoffs = [1] * len(payoff_probs)
        super().__init__(payoffs, payoff_probs)

        for p in payoff_probs:
            assert p >= 0 and p <= 1

        self.payoffs = np.array(payoffs)
        self.payoff_probs = np.array(payoff_probs)

        self.arm_evs = self.payoff_probs
        self.best_ev = np.max(self.arm_evs)
        self.best_arm = np.argmax(self.arm_evs)

    @property
    def hyperparameters(self):
        """A dictionary of the bandit hyperparameters"""
        return {
            "id": "BernoulliBandit",
            "payoff_probs": self.payoff_probs,
        }

    def oracle_payoff(self, context=None):
        """
        Return the expected reward for an optimal agent.

        Parameters
        ----------
        context : :py:class:`ndarray <numpy.ndarray>` of shape `(D, K)` or None
            Unused. Default is None.

        Returns
        -------
        optimal_rwd : float
            The expected reward under an optimal policy.
        optimal_arm : float
            The arm ID with the largest expected reward.
        """
        return self.best_ev, self.best_arm

    def _pull(self, arm_id, context):
        return int(np.random.rand() <= self.payoff_probs[arm_id])


class GaussianBandit(Bandit):
    def __init__(self, payoff_dists, payoff_probs):
        """
        A multi-armed bandit that is similar to
        :class:`BernoulliBandit`, but instead of each arm having
        a fixed payout of 1, the payoff values are sampled from independent
        Gaussian RVs.

        Parameters
        ----------
        payoff_dists : list of 2-tuples of length `K`
            The parameters the distributions over payoff values for each of the
            `n` arms. Specifically, ``payoffs[k]`` is a tuple of (mean, variance)
            for the Gaussian distribution over payoffs associated with arm `k`.
        payoff_probs : list of length `n`
            A list of the probabilities associated with each of the payoff
            values in ``payoffs``. ``payoff_probs[k]`` holds the probability of
            payoff for arm `k`.
        """
        super().__init__(payoff_dists, payoff_probs)

        for (mean, var), rp in zip(payoff_dists, payoff_probs):
            assert var > 0
            assert np.testing.assert_almost_equal(sum(rp), 1.0)

        self.payoff_dists = payoff_dists
        self.payoff_probs = payoff_probs
        self.arm_evs = np.array([mu for (mu, var) in payoff_dists])
        self.best_ev = np.max(self.arm_evs)
        self.best_arm = np.argmax(self.arm_evs)

    @property
    def hyperparameters(self):
        """A dictionary of the bandit hyperparameters"""
        return {
            "id": "GaussianBandit",
            "payoff_dists": self.payoff_dists,
            "payoff_probs": self.payoff_probs,
        }

    def _pull(self, arm_id, context):
        mean, var = self.payoff_dists[arm_id]

        reward = 0
        if np.random.rand() < self.payoff_probs[arm_id]:
            reward = np.random.normal(mean, var)

        return reward

    def oracle_payoff(self, context=None):
        """
        Return the expected reward for an optimal agent.

        Parameters
        ----------
        context : :py:class:`ndarray <numpy.ndarray>` of shape `(D, K)` or None
            Unused. Default is None.

        Returns
        -------
        optimal_rwd : float
            The expected reward under an optimal policy.
        optimal_arm : float
            The arm ID with the largest expected reward.
        """
        return self.best_ev, self.best_arm


class ShortestPathBandit(Bandit):
    def __init__(self, G, start_vertex, end_vertex):
        """
        A weighted graph shortest path problem formulated as a multi-armed
        bandit.

        Notes
        -----
        Each arm corresponds to a valid path through the graph from start to
        end vertex. The agent's goal is to find the path that minimizes the
        expected sum of the weights on the edges it traverses.

        Parameters
        ----------
        G : :class:`Graph <numpy_ml.utils.graphs.Graph>` instance
            A weighted graph object. Weights can be fixed or probabilistic.
        start_vertex : int
            The index of the path's start vertex in the graph
        end_vertex : int
            The index of the path's end vertex in the graph
        """
        self.G = G
        self.end_vertex = end_vertex
        self.adj_dict = G.to_adj_dict()
        self.start_vertex = start_vertex
        self.paths = G.all_paths(start_vertex, end_vertex)

        self.arm_evs = self._calc_arm_evs()
        self.best_ev = np.max(self.arm_evs)
        self.best_arm = np.argmax(self.arm_evs)

        placeholder = [None] * len(self.paths)
        super().__init__(placeholder, placeholder)

    @property
    def hyperparameters(self):
        """A dictionary of the bandit hyperparameters"""
        return {
            "id": "ShortestPathBandit",
            "G": self.G,
            "end_vertex": self.end_vertex,
            "start_vertex": self.start_vertex,
        }

    def oracle_payoff(self, context=None):
        """
        Return the expected reward for an optimal agent.

        Parameters
        ----------
        context : :py:class:`ndarray <numpy.ndarray>` of shape `(D, K)` or None
            Unused. Default is None.

        Returns
        -------
        optimal_rwd : float
            The expected reward under an optimal policy.
        optimal_arm : float
            The arm ID with the largest expected reward.
        """
        return self.best_ev, self.best_arm

    def _calc_arm_evs(self):
        I2V = self.G.get_vertex
        evs = np.zeros(len(self.paths))
        for p_ix, path in enumerate(self.paths):
            for ix, v_i in enumerate(path[:-1]):
                e = [e for e in self.adj_dict[v_i] if e.to == I2V(path[ix + 1])][0]
                evs[p_ix] -= e.weight
        return evs

    def _pull(self, arm_id, context):
        reward = 0
        I2V = self.G.get_vertex
        path = self.paths[arm_id]
        for ix, v_i in enumerate(path[:-1]):
            e = [e for e in self.adj_dict[v_i] if e.to == I2V(path[ix + 1])][0]
            reward -= e.weight
        return reward


class ContextualBernoulliBandit(Bandit):
    def __init__(self, context_probs):
        """
        A contextual version of :class:`BernoulliBandit` where each binary
        context feature is associated with an independent Bernoulli payoff
        distribution.

        Parameters
        ----------
        context_probs : :py:class:`ndarray <numpy.ndarray>` of shape `(D, K)`
            A matrix of the payoff probabilities associated with each of the
            `D` context features, for each of the `K` arms. Index `(i, j)`
            contains the probability of payoff for arm `j` under context `i`.
        """
        D, K = context_probs.shape

        # use a dummy placeholder variable to initialize the Bandit superclass
        placeholder = [None] * K
        super().__init__(placeholder, placeholder)

        self.context_probs = context_probs
        self.arm_evs = self.context_probs
        self.best_evs = self.arm_evs.max(axis=1)
        self.best_arms = self.arm_evs.argmax(axis=1)

    @property
    def hyperparameters(self):
        """A dictionary of the bandit hyperparameters"""
        return {
            "id": "ContextualBernoulliBandit",
            "context_probs": self.context_probs,
        }

    def get_context(self):
        """
        Sample a random one-hot context vector. This vector will be the same
        for all arms.

        Returns
        -------
        context : :py:class:`ndarray <numpy.ndarray>` of shape `(D, K)`
            A random `D`-dimensional one-hot context vector repeated for each
            of the `K` bandit arms.
        """
        D, K = self.context_probs.shape
        context = np.zeros((D, K))
        context[np.random.choice(D), :] = 1
        return random_one_hot_matrix(1, D).ravel()

    def oracle_payoff(self, context):
        """
        Return the expected reward for an optimal agent.

        Parameters
        ----------
        context : :py:class:`ndarray <numpy.ndarray>` of shape `(D, K)` or None
            The current context matrix for each of the bandit arms.

        Returns
        -------
        optimal_rwd : float
            The expected reward under an optimal policy.
        optimal_arm : float
            The arm ID with the largest expected reward.
        """
        context_id = context[:, 0].argmax()
        return self.best_evs[context_id], self.best_arms[context_id]

    def _pull(self, arm_id, context):
        D, K = self.context_probs.shape
        arm_probs = context[:, arm_id] @ self.context_probs
        arm_rwds = (np.random.rand(K) <= arm_probs).astype(int)
        return arm_rwds[arm_id]


class ContextualLinearBandit(Bandit):
    def __init__(self, K, D, payoff_variance=1):
        r"""
        A contextual linear multi-armed bandit.

        Notes
        -----
        In a contextual linear bandit the expected payoff of an arm :math:`a
        \in \mathcal{A}` at time `t` is a linear combination of its context
        vector :math:`\mathbf{x}_{t,a}` with a coefficient vector
        :math:`\theta_a`:

        .. math::

            \mathbb{E}[r_{t, a} \mid \mathbf{x}_{t, a}] = \mathbf{x}_{t,a}^\top \theta_a

        In this implementation, the arm coefficient vectors :math:`\theta` are
        initialized independently from a uniform distribution on the interval
        [-1, 1], and the specific reward at timestep `t` is normally
        distributed:

        .. math::

            r_{t, a} \mid \mathbf{x}_{t, a} \sim
                \mathcal{N}(\mathbf{x}_{t,a}^\top \theta_a, \sigma_a^2)

        Parameters
        ----------
        K : int
            The number of bandit arms
        D : int
            The dimensionality of the context vectors
        payoff_variance : float or :py:class:`ndarray <numpy.ndarray>` of shape `(K,)`
            The variance of the random noise in the arm payoffs. If a float,
            the variance is assumed to be equal for each arm. Default is 1.
        """
        if is_number(payoff_variance):
            payoff_variance = [payoff_variance] * K

        assert len(payoff_variance) == K
        assert all(v > 0 for v in payoff_variance)

        self.K = K
        self.D = D
        self.payoff_variance = payoff_variance

        # use a dummy placeholder variable to initialize the Bandit superclass
        placeholder = [None] * K
        super().__init__(placeholder, placeholder)

        # initialize the theta matrix
        self.thetas = np.random.uniform(-1, 1, size=(D, K))
        self.thetas /= np.linalg.norm(self.thetas, 2)

    @property
    def hyperparameters(self):
        """A dictionary of the bandit hyperparameters"""
        return {
            "id": "ContextualLinearBandit",
            "K": self.K,
            "D": self.D,
            "payoff_variance": self.payoff_variance,
        }

    @property
    def parameters(self):
        """A dictionary of the current bandit parameters"""
        return {"thetas": self.thetas}

    def get_context(self):
        """
        Sample the context vectors for each arm from a multivariate standard
        normal distribution.

        Returns
        -------
        context : :py:class:`ndarray <numpy.ndarray>` of shape `(D, K)`
            A `D`-dimensional context vector sampled from a standard normal
            distribution for each of the `K` bandit arms.
        """
        return np.random.normal(size=(self.D, self.K))

    def oracle_payoff(self, context):
        """
        Return the expected reward for an optimal agent.

        Parameters
        ----------
        context : :py:class:`ndarray <numpy.ndarray>` of shape `(D, K)` or None
            The current context matrix for each of the bandit arms, if
            applicable. Default is None.

        Returns
        -------
        optimal_rwd : float
            The expected reward under an optimal policy.
        optimal_arm : float
            The arm ID with the largest expected reward.
        """
        best_arm = np.argmax(self.arm_evs)
        return self.arm_evs[best_arm], best_arm

    def _pull(self, arm_id, context):
        K, thetas = self.K, self.thetas
        self._noise = np.random.normal(scale=self.payoff_variance, size=self.K)
        self.arm_evs = np.array([context[:, k] @ thetas[:, k] for k in range(K)])
        return (self.arm_evs + self._noise)[arm_id]


================================================
FILE: numpy_ml/bandits/policies.py
================================================
"""A module containing exploration policies for various multi-armed bandit problems."""

from abc import ABC, abstractmethod
from collections import defaultdict

import numpy as np

from ..utils.testing import is_number


class BanditPolicyBase(ABC):
    def __init__(self):
        """A simple base class for multi-armed bandit policies"""
        self.step = 0
        self.ev_estimates = {}
        self.is_initialized = False
        super().__init__()

    def __repr__(self):
        """Return a string representation of the policy"""
        HP = self.hyperparameters
        params = ", ".join(["{}={}".format(k, v) for (k, v) in HP.items() if k != "id"])
        return "{}({})".format(HP["id"], params)

    @property
    def hyperparameters(self):
        """A dictionary containing the policy hyperparameters"""
        pass

    @property
    def parameters(self):
        """A dictionary containing the current policy parameters"""
        pass

    def act(self, bandit, context=None):
        """
        Select an arm and sample from its payoff distribution.

        Parameters
        ----------
        bandit : :class:`Bandit <numpy_ml.bandits.bandits.Bandit>` instance
            The multi-armed bandit to act upon
        context : :py:class:`ndarray <numpy.ndarray>` of shape `(D,)` or None
            The context vector for the current timestep if interacting with a
            contextual bandit. Otherwise, this argument is unused. Default is
            None.

        Returns
        -------
        rwd : float
            The reward received after pulling ``arm_id``.
        arm_id : int
            The arm that was pulled to generate ``rwd``.
        """
        if not self.is_initialized:
            self._initialize_params(bandit)

        arm_id = self._select_arm(bandit, context)
        rwd = self._pull_arm(bandit, arm_id, context)
        self._update_params(arm_id, rwd, context)
        return rwd, arm_id

    def reset(self):
        """Reset the policy parameters and counters to their initial states."""
        self.step = 0
        self._reset_params()
        self.is_initialized = False

    def _pull_arm(self, bandit, arm_id, context):
        """Execute a bandit action and return the received reward."""
        self.step += 1
        return bandit.pull(arm_id, context)

    @abstractmethod
    def _select_arm(self, bandit, context):
        """Select an arm based on the current context"""
        pass

    @abstractmethod
    def _update_params(self, bandit, context):
        """Update the policy parameters after an interaction"""
        pass

    @abstractmethod
    def _initialize_params(self, bandit):
        """
        Initialize any policy-specific parameters that depend on information
        from the bandit environment.
        """
        pass

    @abstractmethod
    def _reset_params(self):
        """
        Reset any model-specific parameters. This gets called within the
        public `self.reset()` method.
        """
        pass


class EpsilonGreedy(BanditPolicyBase):
    def __init__(self, epsilon=0.05, ev_prior=0.5):
        r"""
        An epsilon-greedy policy for multi-armed bandit problems.

        Notes
        -----
        Epsilon-greedy policies greedily select the arm with the highest
        expected payoff with probability :math:`1-\epsilon`, and selects an arm
        uniformly at random with probability :math:`\epsilon`:

        .. math::

            P(a) = \left\{
                 \begin{array}{lr}
                   \epsilon / N + (1 - \epsilon) &\text{if }
                        a = \arg \max_{a' \in \mathcal{A}}
                            \mathbb{E}_{q_{\hat{\theta}}}[r \mid a']\\
                   \epsilon / N &\text{otherwise}
                 \end{array}
               \right.

        where :math:`N = |\mathcal{A}|` is the number of arms,
        :math:`q_{\hat{\theta}}` is the estimate of the arm payoff
        distribution under current model parameters :math:`\hat{\theta}`, and
        :math:`\mathbb{E}_{q_{\hat{\theta}}}[r \mid a']` is the expected
        reward under :math:`q_{\hat{\theta}}` of receiving reward `r` after
        taking action :math:`a'`.

        Parameters
        ----------
        epsilon : float in [0, 1]
            The probability of taking a random action. Default is 0.05.
        ev_prior : float
            The starting expected payoff for each arm before any data has been
            observed. Default is 0.5.
        """
        super().__init__()
        self.epsilon = epsilon
        self.ev_prior = ev_prior
        self.pull_counts = defaultdict(lambda: 0)

    @property
    def parameters(self):
        """A dictionary containing the current policy parameters"""
        return {"ev_estimates": self.ev_estimates}

    @property
    def hyperparameters(self):
        """A dictionary containing the policy hyperparameters"""
        return {
            "id": "EpsilonGreedy",
            "epsilon": self.epsilon,
            "ev_prior": self.ev_prior,
        }

    def _initialize_params(self, bandit):
        """
        Initialize any policy-specific parameters that depend on information
        from the bandit environment.
        """
        self.ev_estimates = {i: self.ev_prior for i in range(bandit.n_arms)}
        self.is_initialized = True

    def _select_arm(self, bandit, context=None):
        if np.random.rand() < self.epsilon:
            arm_id = np.random.choice(bandit.n_arms)
        else:
            ests = self.ev_estimates
            (arm_id, _) = max(ests.items(), key=lambda x: x[1])
        return arm_id

    def _update_params(self, arm_id, reward, context=None):
        E, C = self.ev_estimates, self.pull_counts
        C[arm_id] += 1
        E[arm_id] += (reward - E[arm_id]) / (C[arm_id])

    def _reset_params(self):
        """
        Reset any model-specific parameters. This gets called within the
        public `self.reset()` method.
        """
        self.ev_estimates = {}
        self.pull_counts = defaultdict(lambda: 0)


class UCB1(BanditPolicyBase):
    def __init__(self, C=1, ev_prior=0.5):
        r"""
        A UCB1 policy for multi-armed bandit problems.

        Notes
        -----
        The UCB1 algorithm [*]_ guarantees the cumulative regret is bounded by log
        `t`, where `t` is the current timestep. To make this guarantee UCB1
        assumes all arm payoffs are between 0 and 1.

        Under UCB1, the upper confidence bound on the expected value for
        pulling arm `a` at timestep `t` is:

        .. math::

            \text{UCB}(a, t) = \text{EV}_t(a) + C \sqrt{\frac{2 \log t}{N_t(a)}}

        where :math:`\text{EV}_t(a)` is the average of the rewards recieved so
        far from pulling arm `a`, `C` is a free parameter controlling the
        "optimism" of the confidence upper bound for :math:`\text{UCB}(a, t)`
        (for logarithmic regret bounds, `C` must equal 1), and :math:`N_t(a)`
        is the number of times arm `a` has been pulled during the previous `t -
        1` timesteps.

        References
        ----------
        .. [*] Auer, P., Cesa-Bianchi, N., & Fischer, P. (2002). Finite-time
           analysis of the multiarmed bandit problem. *Machine Learning,
           47(2)*.

        Parameters
        ----------
        C : float in (0, +infinity)
            A confidence/optimisim parameter affecting the degree of
            exploration, where larger values encourage greater exploration. The
            UCB1 algorithm assumes `C=1`. Default is 1.
        ev_prior : float
            The starting expected value for each arm before any data has been
            observed. Default is 0.5.
        """
        self.C = C
        self.ev_prior = ev_prior
        super().__init__()

    @property
    def parameters(self):
        """A dictionary containing the current policy parameters"""
        return {"ev_estimates": self.ev_estimates}

    @property
    def hyperparameters(self):
        """A dictionary containing the policy hyperparameters"""
        return {
            "C": self.C,
            "id": "UCB1",
            "ev_prior": self.ev_prior,
        }

    def _initialize_params(self, bandit):
        """
        Initialize any policy-specific parameters that depend on information
        from the bandit environment.
        """
        self.ev_estimates = {i: self.ev_prior for i in range(bandit.n_arms)}
        self.is_initialized = True

    def _select_arm(self, bandit, context=None):
        # add eps to avoid divide-by-zero errors on the first pull of each arm
        eps = np.finfo(float).eps
        N, T = bandit.n_arms, self.step + 1
        E, C = self.ev_estimates, self.pull_counts
        scores = [E[a] + self.C * np.sqrt(np.log(T) / (C[a] + eps)) for a in range(N)]
        return np.argmax(scores)

    def _update_params(self, arm_id, reward, context=None):
        E, C = self.ev_estimates, self.pull_counts
        C[arm_id] += 1
        E[arm_id] += (reward - E[arm_id]) / (C[arm_id])

    def _reset_params(self):
        """
        Reset any model-specific parameters. This gets called within the
        public :method:`reset` method.
        """
        self.ev_estimates = {}
        self.pull_counts = defaultdict(lambda: 0)


class ThompsonSamplingBetaBinomial(BanditPolicyBase):
    def __init__(self, alpha=1, beta=1):
        r"""
        A conjugate Thompson sampling [1]_ [2]_ policy for multi-armed bandits with
        Bernoulli likelihoods.

        Notes
        -----
        The policy assumes independent Beta priors on the Bernoulli arm payoff
        probabilities, :math:`\theta`:

        .. math::

            \theta_k \sim \text{Beta}(\alpha_k, \beta_k) \\
            r \mid \theta_k \sim \text{Bernoulli}(\theta_k)

        where :math:`k \in \{1,\ldots,K \}` indexes arms in the MAB and
        :math:`\theta_k` is the parameter of the Bernoulli likelihood for arm
        `k`. The sampler begins by selecting an arm with probability
        proportional to its payoff probability under the initial Beta prior.
        After pulling the sampled arm and receiving a reward, `r`, the sampler
        computes the posterior over the model parameters (arm payoffs) via
        Bayes' rule, and then samples a new action in proportion to its payoff
        probability under this posterior. This process (i.e., sample action
        from posterior, take action and receive reward, compute updated
        posterior) is repeated until the number of trials is exhausted.

        Note that due to the conjugacy between the Beta prior and Bernoulli
        likelihood the posterior for each arm will also be Beta-distributed and
        can computed and sampled from efficiently:

        .. math::

            \theta_k \mid r \sim \text{Beta}(\alpha_k + r, \beta_k + 1 - r)

        References
        ----------
        .. [1] Thompson, W. (1933). On the likelihood that one unknown
           probability exceeds another in view of the evidence of two samples.
           *Biometrika, 25(3/4)*, 285-294.
        .. [2] Chapelle, O., & Li, L. (2011). An empirical evaluation of
           Thompson sampling. *Advances in Neural Information Processing
           Systems, 24*, 2249-2257.

        Parameters
        ----------
        alpha : float or list of length `K`
            Parameter for the Beta prior on arm payouts. If a float, this value
            will be used in the prior for all of the `K` arms.
        beta : float or list of length `K`
            Parameter for the Beta prior on arm payouts. If a float, this value
            will be used in the prior for all of the `K` arms.
        """
        super().__init__()
        self.alphas, self.betas = [], []
        self.alpha, self.beta = alpha, beta
        self.is_initialized = False

    @property
    def parameters(self):
        """A dictionary containing the current policy parameters"""
        return {
            "ev_estimates": self.ev_estimates,
            "alphas": self.alphas,
            "betas": self.betas,
        }

    @property
    def hyperparameters(self):
        """A dictionary containing the policy hyperparameters"""
        return {
            "id": "ThompsonSamplingBetaBinomial",
            "alpha": self.alpha,
            "beta": self.beta,
        }

    def _initialize_params(self, bandit):
        bhp = bandit.hyperparameters
        fstr = "ThompsonSamplingBetaBinomial only defined for BernoulliBandit, got: {}"
        assert bhp["id"] == "BernoulliBandit", fstr.format(bhp["id"])

        # initialize the model prior
        if is_number(self.alpha):
            self.alphas = [self.alpha] * bandit.n_arms
        if is_number(self.beta):
            self.betas = [self.beta] * bandit.n_arms
        assert len(self.alphas) == len(self.betas) == bandit.n_arms

        self.ev_estimates = {i: self._map_estimate(i, 1) for i in range(bandit.n_arms)}
        self.is_initialized = True

    def _select_arm(self, bandit, context):
        if not self.is_initialized:
            self._initialize_prior(bandit)

        # draw a sample from the current model posterior
        posterior_sample = np.random.beta(self.alphas, self.betas)

        # greedily select an action based on this sample
        return np.argmax(posterior_sample)

    def _update_params(self, arm_id, rwd, context):
        """
        Compute the parameters of the Beta posterior, P(payoff prob | rwd),
        for arm `arm_id`.
        """
        self.alphas[arm_id] += rwd
        self.betas[arm_id] += 1 - rwd
        self.ev_estimates[arm_id] = self._map_estimate(arm_id, rwd)

    def _map_estimate(self, arm_id, rwd):
        """Compute the current MAP estimate for an arm's payoff probability"""
        A, B = self.alphas, self.betas
        if A[arm_id] > 1 and B[arm_id] > 1:
            map_payoff_prob = (A[arm_id] - 1) / (A[arm_id] + B[arm_id] - 2)
        elif A[arm_id] < 1 and B[arm_id] < 1:
            map_payoff_prob = rwd  # 0 or 1 equally likely, make a guess
        elif A[arm_id] <= 1 and B[arm_id] > 1:
            map_payoff_prob = 0
        elif A[arm_id] > 1 and B[arm_id] <= 1:
            map_payoff_prob = 1
        else:
            map_payoff_prob = 0.5
        return map_payoff_prob

    def _reset_params(self):
        """
        Reset any model-specific parameters. This gets called within the
        public `self.reset()` method.
        """
        self.alphas, self.betas = [], []
        self.ev_estimates = {}


class LinUCB(BanditPolicyBase):
    def __init__(self, alpha=1):
        """
        A disjoint linear UCB policy [*]_ for contextual linear bandits.

        Notes
        -----
        LinUCB is only defined for :class:`ContextualLinearBandit <numpy_ml.bandits.ContextualLinearBandit>` environments.

        References
        ----------
        .. [*] Li, L., Chu, W., Langford, J., & Schapire, R. (2010). A
           contextual-bandit approach to personalized news article
           recommendation. In *Proceedings of the 19th International Conference
           on World Wide Web*, 661-670.

        Parameters
        ----------
        alpha : float
            A confidence/optimisim parameter affecting the amount of
            exploration. Default is 1.
        """  # noqa
        super().__init__()

        self.alpha = alpha
        self.A, self.b = [], []
        self.is_initialized = False

    @property
    def parameters(self):
        """A dictionary containing the current policy parameters"""
        return {"ev_estimates": self.ev_estimates, "A": self.A, "b": self.b}

    @property
    def hyperparameters(self):
        """A dictionary containing the policy hyperparameters"""
        return {
            "id": "LinUCB",
            "alpha": self.alpha,
        }

    def _initialize_params(self, bandit):
        """
        Initialize any policy-specific parameters that depend on information
        from the bandit environment.
        """
        bhp = bandit.hyperparameters
        fstr = "LinUCB only defined for contextual linear bandits, got: {}"
        assert bhp["id"] == "ContextualLinearBandit", fstr.format(bhp["id"])

        self.A, self.b = [], []
        for _ in range(bandit.n_arms):
            self.A.append(np.eye(bandit.D))
            self.b.append(np.zeros(bandit.D))

        self.is_initialized = True

    def _select_arm(self, bandit, context):
        probs = []
        for a in range(bandit.n_arms):
            C, A, b = context[:, a], self.A[a], self.b[a]
            A_inv = np.linalg.inv(A)
            theta_hat = A_inv @ b
            p = theta_hat @ C + self.alpha * np.sqrt(C.T @ A_inv @ C)

            probs.append(p)
        return np.argmax(probs)

    def _update_params(self, arm_id, rwd, context):
        """Compute the parameters for A and b."""
        self.A[arm_id] += context[:, arm_id] @ context[:, arm_id].T
        self.b[arm_id] += rwd * context[:, arm_id]

    def _reset_params(self):
        """
        Reset any model-specific parameters. This gets called within the
        public `self.reset()` method.
        """
        self.A, self.b = [], []
        self.ev_estimates = {}


================================================
FILE: numpy_ml/bandits/trainer.py
================================================
"""A trainer/runner object for executing and comparing MAB policies."""

import warnings
import os.path as op
from collections import defaultdict

import numpy as np

from numpy_ml.utils.testing import DependencyWarning

try:
    import matplotlib.pyplot as plt

    _PLOTTING = True
except ImportError:
    fstr = "Cannot import matplotlib. Plotting functionality disabled."
    warnings.warn(fstr, DependencyWarning)
    _PLOTTING = False


def get_scriptdir():
    """Return the directory containing the `trainer.py` script"""
    return op.dirname(op.realpath(__file__))


def mse(bandit, policy):
    """
    Computes the mean squared error between a policy's estimates of the
    expected arm payouts and the true expected payouts.
    """
    if not hasattr(policy, "ev_estimates") or len(policy.ev_estimates) == 0:
        return np.nan

    se = []
    evs = bandit.arm_evs
    ests = sorted(policy.ev_estimates.items(), key=lambda x: x[0])
    for ix, (est, ev) in enumerate(zip(ests, evs)):
        se.append((est[1] - ev) ** 2)
    return np.mean(se)


def smooth(prev, cur, weight):
    r"""
    Compute a simple weighted average of the previous and current value.

    Notes
    -----
    The smoothed value at timestep `t`, :math:`\tilde{X}_t` is calculated as

    .. math::

        \tilde{X}_t = \epsilon \tilde{X}_{t-1} + (1 - \epsilon) X_t

    where :math:`X_t` is the value at timestep `t`, :math:`\tilde{X}_{t-1}` is
    the value of the smoothed signal at timestep `t-1`, and :math:`\epsilon` is
    the smoothing weight.

    Parameters
    ----------
    prev : float or :py:class:`ndarray <numpy.ndarray>` of shape `(N,)`
        The value of the smoothed signal at the immediately preceding
        timestep.
    cur : float or :py:class:`ndarray <numpy.ndarray>` of shape `(N,)`
        The value of the signal at the current timestep
    weight : float or :py:class:`ndarray <numpy.ndarray>` of shape `(N,)`
        The smoothing weight. Values closer to 0 result in less smoothing,
        values closer to 1 produce more aggressive smoothing. If weight is an
        array, each dimension will be interpreted as a separate smoothing
        weight the corresponding dimension in `cur`.

    Returns
    -------
    smoothed : float or :py:class:`ndarray <numpy.ndarray>` of shape `(N,)`
        The smoothed signal
    """
    return weight * prev + (1 - weight) * cur


class BanditTrainer:
    def __init__(self):
        """
        An object to facilitate multi-armed bandit training, comparison, and
        evaluation.
        """
        self.logs = {}

    def compare(
        self,
        policies,
        bandit,
        n_trials,
        n_duplicates,
        plot=True,
        seed=None,
        smooth_weight=0.999,
        out_dir=None,
    ):
        """
        Compare the performance of multiple policies on the same bandit
        environment, generating a plot for each.

        Parameters
        ----------
        policies : list of :class:`BanditPolicyBase <numpy_ml.bandits.policies.BanditPolicyBase>` instances
            The multi-armed bandit policies to compare.
        bandit : :class:`Bandit <numpy_ml.bandits.bandits.Bandit>` instance
            The environment to train the policies on.
        n_trials : int
            The number of trials per run.
        n_duplicates: int
            The number of times to evaluate each policy on the bandit
            environment. Larger values permit a better estimate of the
            variance in payoff / cumulative regret for each policy.
        plot : bool
            Whether to generate a plot of the policy's average reward and
            regret across the episodes. Default is True.
        seed : int
            The seed for the random number generator. Default is None.
        smooth_weight : float in [0, 1]
            The smoothing weight. Values closer to 0 result in less smoothing,
            values closer to 1 produce more aggressive smoothing. Default is
            0.999.
        out_dir : str or None
            Plots will be saved to this directory if `plot` is True. If
            `out_dir` is None, plots will not be saved. Default is None.
        """  # noqa: E501
        self.init_logs(policies)

        all_axes = [None] * len(policies)
        if plot and _PLOTTING:
            fig, all_axes = plt.subplots(len(policies), 2, sharex=True)
            fig.set_size_inches(10.5, len(policies) * 5.25)

        for policy, axes in zip(policies, all_axes):
            if seed:
                np.random.seed(seed)

            bandit.reset()
            policy.reset()

            self.train(
                policy,
                bandit,
                n_trials,
                n_duplicates,
                axes=axes,
                plot=plot,
                verbose=False,
                out_dir=out_dir,
                smooth_weight=smooth_weight,
            )

        # enforce the same y-ranges across plots for straightforward comparison
        a1_r, a2_r = zip(*[(a1.get_ylim(), a2.get_ylim()) for (a1, a2) in all_axes])

        a1_min = min(a1_r, key=lambda x: x[0])[0]
        a1_max = max(a1_r, key=lambda x: x[1])[1]
        a2_min = min(a2_r, key=lambda x: x[0])[0]
        a2_max = max(a2_r, key=lambda x: x[1])[1]

        for (a1, a2) in all_axes:
            a1.set_ylim(a1_min, a1_max)
            a2.set_ylim(a2_min, a2_max)

        if plot and _PLOTTING:
            if out_dir is not None:
                plt.savefig(op.join(out_dir, "bandit_comparison.png"), dpi=300)
            plt.show()

    def train(
        self,
        policy,
        bandit,
        n_trials,
        n_duplicates,
        plot=True,
        axes=None,
        verbose=True,
        print_every=100,
        smooth_weight=0.999,
        out_dir=None,
    ):
        """
        Train a MAB policies on a multi-armed bandit problem, logging training
        statistics along the way.

        Parameters
        ----------
        policy : :class:`BanditPolicyBase <numpy_ml.bandits.policies.BanditPolicyBase>` instance
            The multi-armed bandit policy to train.
        bandit : :class:`Bandit <numpy_ml.bandits.bandits.Bandit>` instance
            The environment to run the policy on.
        n_trials : int
            The number of trials per run.
        n_duplicates: int
            The number of runs to evaluate
        plot : bool
            Whether to generate a plot of the policy's average reward and
            regret across the episodes. Default is True.
        axes : list of :py:class:`Axis <matplotlib.axes.Axis>` instances or None
            If not None and ``plot = True``, these are the axes that will be
            used to plot the cumulative reward and regret, respectively.
            Default is None.
        verbose : boolean
            Whether to print run statistics during training. Default is True.
        print_every : int
            The number of episodes to run before printing loss values to
            stdout. This is ignored if ``verbose`` is false. Default is 100.
        smooth_weight : float in [0, 1]
            The smoothing weight. Values closer to 0 result in less smoothing,
            values closer to 1 produce more aggressive smoothing. Default is
            0.999.
        out_dir : str or None
            Plots will be saved to this directory if `plot` is True. If
            `out_dir` is None, plots will not be saved. Default is None.

        Returns
        -------
        policy : :class:`BanditPolicyBase <numpy_ml.bandits.policies.BanditPolicyBase>` instance
            The policy trained during the last (i.e. most recent) duplicate
            run.
        """  # noqa: E501
        if not str(policy) in self.logs:
            self.init_logs(policy)

        p = str(policy)
        D, L = n_duplicates, self.logs

        for d in range(D):
            if verbose:
                print("\nDUPLICATE {}/{}\n".format(d + 1, D))

            bandit.reset()
            policy.reset()

            avg_oracle_reward, cregret = 0, 0
            for trial_id in range(n_trials):
                rwd, arm, orwd, oarm = self._train_step(bandit, policy)

                loss = mse(bandit, policy)
                regret = orwd - rwd

                avg_oracle_reward += orwd
                cregret += regret

                L[p]["mse"][trial_id + 1].append(loss)
                L[p]["reward"][trial_id + 1].append(rwd)
                L[p]["regret"][trial_id + 1].append(regret)
                L[p]["cregret"][trial_id + 1].append(cregret)
                L[p]["optimal_arm"][trial_id + 1].append(oarm)
                L[p]["selected_arm"][trial_id + 1].append(arm)
                L[p]["optimal_reward"][trial_id + 1].append(orwd)

                if (trial_id + 1) % print_every == 0 and verbose:
                    fstr = "Trial {}/{}, {}/{}, Regret: {:.4f}"
                    print(fstr.format(trial_id + 1, n_trials, d + 1, D, regret))

            avg_oracle_reward /= n_trials

            if verbose:
                self._print_run_summary(bandit, policy, regret)

        if plot and _PLOTTING:
            self._plot_reward(avg_oracle_reward, policy, smooth_weight, axes, out_dir)

        return policy

    def _train_step(self, bandit, policy):
        P, B = policy, bandit
        C = B.get_context() if hasattr(B, "get_context") else None
        rwd, arm = P.act(B, C)
        oracle_rwd, oracle_arm = B.oracle_payoff(C)

Download .txt

gitextract_t47luwfk/

├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   └── a--bug-performance-issue.md
│   └── PULL_REQUEST_TEMPLATE.md
├── .gitignore
├── .readthedocs.yml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── docs/
│   ├── Makefile
│   ├── README.md
│   ├── conf.py
│   ├── index.rst
│   ├── make.bat
│   ├── numpy_ml.bandits.bandits.rst
│   ├── numpy_ml.bandits.policies.rst
│   ├── numpy_ml.bandits.rst
│   ├── numpy_ml.bandits.trainer.rst
│   ├── numpy_ml.factorization.factors.rst
│   ├── numpy_ml.factorization.rst
│   ├── numpy_ml.gmm.gmm.rst
│   ├── numpy_ml.gmm.rst
│   ├── numpy_ml.hmm.MultinomialHMM.rst
│   ├── numpy_ml.hmm.rst
│   ├── numpy_ml.lda.lda.rst
│   ├── numpy_ml.lda.rst
│   ├── numpy_ml.lda.smoothed_lda.rst
│   ├── numpy_ml.linear_models.lm.rst
│   ├── numpy_ml.linear_models.rst
│   ├── numpy_ml.neural_nets.activations.rst
│   ├── numpy_ml.neural_nets.initializers.rst
│   ├── numpy_ml.neural_nets.layers.rst
│   ├── numpy_ml.neural_nets.losses.rst
│   ├── numpy_ml.neural_nets.models.rst
│   ├── numpy_ml.neural_nets.modules.rst
│   ├── numpy_ml.neural_nets.optimizers.rst
│   ├── numpy_ml.neural_nets.rst
│   ├── numpy_ml.neural_nets.schedulers.rst
│   ├── numpy_ml.neural_nets.utils.rst
│   ├── numpy_ml.neural_nets.wrappers.rst
│   ├── numpy_ml.ngram.additive.rst
│   ├── numpy_ml.ngram.goodturing.rst
│   ├── numpy_ml.ngram.mle.rst
│   ├── numpy_ml.ngram.rst
│   ├── numpy_ml.nonparametric.gp.rst
│   ├── numpy_ml.nonparametric.kernel_regression.rst
│   ├── numpy_ml.nonparametric.knn.rst
│   ├── numpy_ml.nonparametric.rst
│   ├── numpy_ml.preprocessing.dsp.rst
│   ├── numpy_ml.preprocessing.general.rst
│   ├── numpy_ml.preprocessing.nlp.rst
│   ├── numpy_ml.preprocessing.rst
│   ├── numpy_ml.rl_models.agents.rst
│   ├── numpy_ml.rl_models.rl_utils.rst
│   ├── numpy_ml.rl_models.rst
│   ├── numpy_ml.rl_models.trainer.rst
│   ├── numpy_ml.trees.dt.rst
│   ├── numpy_ml.trees.gbdt.rst
│   ├── numpy_ml.trees.losses.rst
│   ├── numpy_ml.trees.rf.rst
│   ├── numpy_ml.trees.rst
│   ├── numpy_ml.utils.data_structures.rst
│   ├── numpy_ml.utils.distance_metrics.rst
│   ├── numpy_ml.utils.graphs.rst
│   ├── numpy_ml.utils.kernels.rst
│   ├── numpy_ml.utils.rst
│   ├── numpy_ml.utils.testing.rst
│   ├── numpy_ml.utils.windows.rst
│   └── requirements.txt
├── numpy_ml/
│   ├── README.md
│   ├── __init__.py
│   ├── bandits/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── bandits.py
│   │   ├── policies.py
│   │   └── trainer.py
│   ├── factorization/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   └── factors.py
│   ├── gmm/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   └── gmm.py
│   ├── hmm/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   └── hmm.py
│   ├── lda/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── lda.py
│   │   └── lda_smoothed.py
│   ├── linear_models/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── bayesian_regression.py
│   │   ├── glm.py
│   │   ├── linear_regression.py
│   │   ├── logistic.py
│   │   ├── naive_bayes.py
│   │   └── ridge.py
│   ├── neural_nets/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── activations/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   └── activations.py
│   │   ├── initializers/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   └── initializers.py
│   │   ├── layers/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   └── layers.py
│   │   ├── losses/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   └── losses.py
│   │   ├── models/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── vae.py
│   │   │   ├── w2v.py
│   │   │   └── wgan_gp.py
│   │   ├── modules/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   └── modules.py
│   │   ├── optimizers/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   └── optimizers.py
│   │   ├── schedulers/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   └── schedulers.py
│   │   ├── utils/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   └── utils.py
│   │   └── wrappers/
│   │       ├── README.md
│   │       ├── __init__.py
│   │       └── wrappers.py
│   ├── ngram/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   └── ngram.py
│   ├── nonparametric/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── gp.py
│   │   ├── kernel_regression.py
│   │   └── knn.py
│   ├── plots/
│   │   ├── bandit_plots.py
│   │   ├── gmm_plots.py
│   │   ├── hmm_plots.py
│   │   ├── lda_plots.py
│   │   ├── lm_plots.py
│   │   ├── ngram_plots.py
│   │   ├── nn_activations_plots.py
│   │   ├── nn_schedulers_plots.py
│   │   ├── nonparametric_plots.py
│   │   ├── rl_plots.py
│   │   └── trees_plots.py
│   ├── preprocessing/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── dsp.py
│   │   ├── general.py
│   │   └── nlp.py
│   ├── rl_models/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── agents.py
│   │   ├── rl_utils.py
│   │   ├── tiles/
│   │   │   ├── __init__.py
│   │   │   └── tiles3.py
│   │   └── trainer.py
│   ├── tests/
│   │   ├── __init__.py
│   │   ├── nn_torch_models.py
│   │   ├── test_glm.py
│   │   ├── test_linear_regression.py
│   │   ├── test_naive_bayes.py
│   │   ├── test_ngram.py
│   │   ├── test_nn.py
│   │   ├── test_nn_activations.py
│   │   ├── test_nonparametric.py
│   │   ├── test_preprocessing.py
│   │   ├── test_trees.py
│   │   └── test_utils.py
│   ├── trees/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── dt.py
│   │   ├── gbdt.py
│   │   ├── losses.py
│   │   └── rf.py
│   └── utils/
│       ├── README.md
│       ├── __init__.py
│       ├── data_structures.py
│       ├── distance_metrics.py
│       ├── graphs.py
│       ├── kernels.py
│       ├── misc.py
│       ├── testing.py
│       └── windows.py
├── requirements-dev.txt
├── requirements-test.txt
├── requirements.txt
├── setup.py
└── tox.ini

Download .txt

SYMBOL INDEX (1357 symbols across 71 files)

FILE: docs/conf.py
  function linkcode_resolve (line 63) | def linkcode_resolve(domain, info):

FILE: numpy_ml/bandits/bandits.py
  class Bandit (line 10) | class Bandit(ABC):
    method __init__ (line 11) | def __init__(self, rewards, reward_probs, context=None):
    method __repr__ (line 18) | def __repr__(self):
    method hyperparameters (line 25) | def hyperparameters(self):
    method oracle_payoff (line 30) | def oracle_payoff(self, context=None):
    method pull (line 47) | def pull(self, arm_id, context=None):
    method reset (line 69) | def reset(self):
    method _pull (line 74) | def _pull(self, arm_id):
  class MultinomialBandit (line 78) | class MultinomialBandit(Bandit):
    method __init__ (line 79) | def __init__(self, payoffs, payoff_probs):
    method hyperparameters (line 110) | def hyperparameters(self):
    method oracle_payoff (line 118) | def oracle_payoff(self, context=None):
    method _pull (line 136) | def _pull(self, arm_id, context):
  class BernoulliBandit (line 142) | class BernoulliBandit(Bandit):
    method __init__ (line 143) | def __init__(self, payoff_probs):
    method hyperparameters (line 168) | def hyperparameters(self):
    method oracle_payoff (line 175) | def oracle_payoff(self, context=None):
    method _pull (line 193) | def _pull(self, arm_id, context):
  class GaussianBandit (line 197) | class GaussianBandit(Bandit):
    method __init__ (line 198) | def __init__(self, payoff_dists, payoff_probs):
    method hyperparameters (line 229) | def hyperparameters(self):
    method _pull (line 237) | def _pull(self, arm_id, context):
    method oracle_payoff (line 246) | def oracle_payoff(self, context=None):
  class ShortestPathBandit (line 265) | class ShortestPathBandit(Bandit):
    method __init__ (line 266) | def __init__(self, G, start_vertex, end_vertex):
    method hyperparameters (line 300) | def hyperparameters(self):
    method oracle_payoff (line 309) | def oracle_payoff(self, context=None):
    method _calc_arm_evs (line 327) | def _calc_arm_evs(self):
    method _pull (line 336) | def _pull(self, arm_id, context):
  class ContextualBernoulliBandit (line 346) | class ContextualBernoulliBandit(Bandit):
    method __init__ (line 347) | def __init__(self, context_probs):
    method hyperparameters (line 372) | def hyperparameters(self):
    method get_context (line 379) | def get_context(self):
    method oracle_payoff (line 395) | def oracle_payoff(self, context):
    method _pull (line 414) | def _pull(self, arm_id, context):
  class ContextualLinearBandit (line 421) | class ContextualLinearBandit(Bandit):
    method __init__ (line 422) | def __init__(self, K, D, payoff_variance=1):
    method hyperparameters (line 476) | def hyperparameters(self):
    method parameters (line 486) | def parameters(self):
    method get_context (line 490) | def get_context(self):
    method oracle_payoff (line 503) | def oracle_payoff(self, context):
    method _pull (line 523) | def _pull(self, arm_id, context):

FILE: numpy_ml/bandits/policies.py
  class BanditPolicyBase (line 11) | class BanditPolicyBase(ABC):
    method __init__ (line 12) | def __init__(self):
    method __repr__ (line 19) | def __repr__(self):
    method hyperparameters (line 26) | def hyperparameters(self):
    method parameters (line 31) | def parameters(self):
    method act (line 35) | def act(self, bandit, context=None):
    method reset (line 63) | def reset(self):
    method _pull_arm (line 69) | def _pull_arm(self, bandit, arm_id, context):
    method _select_arm (line 75) | def _select_arm(self, bandit, context):
    method _update_params (line 80) | def _update_params(self, bandit, context):
    method _initialize_params (line 85) | def _initialize_params(self, bandit):
    method _reset_params (line 93) | def _reset_params(self):
  class EpsilonGreedy (line 101) | class EpsilonGreedy(BanditPolicyBase):
    method __init__ (line 102) | def __init__(self, epsilon=0.05, ev_prior=0.5):
    method parameters (line 144) | def parameters(self):
    method hyperparameters (line 149) | def hyperparameters(self):
    method _initialize_params (line 157) | def _initialize_params(self, bandit):
    method _select_arm (line 165) | def _select_arm(self, bandit, context=None):
    method _update_params (line 173) | def _update_params(self, arm_id, reward, context=None):
    method _reset_params (line 178) | def _reset_params(self):
  class UCB1 (line 187) | class UCB1(BanditPolicyBase):
    method __init__ (line 188) | def __init__(self, C=1, ev_prior=0.5):
    method parameters (line 233) | def parameters(self):
    method hyperparameters (line 238) | def hyperparameters(self):
    method _initialize_params (line 246) | def _initialize_params(self, bandit):
    method _select_arm (line 254) | def _select_arm(self, bandit, context=None):
    method _update_params (line 262) | def _update_params(self, arm_id, reward, context=None):
    method _reset_params (line 267) | def _reset_params(self):
  class ThompsonSamplingBetaBinomial (line 276) | class ThompsonSamplingBetaBinomial(BanditPolicyBase):
    method __init__ (line 277) | def __init__(self, alpha=1, beta=1):
    method parameters (line 335) | def parameters(self):
    method hyperparameters (line 344) | def hyperparameters(self):
    method _initialize_params (line 352) | def _initialize_params(self, bandit):
    method _select_arm (line 367) | def _select_arm(self, bandit, context):
    method _update_params (line 377) | def _update_params(self, arm_id, rwd, context):
    method _map_estimate (line 386) | def _map_estimate(self, arm_id, rwd):
    method _reset_params (line 401) | def _reset_params(self):
  class LinUCB (line 410) | class LinUCB(BanditPolicyBase):
    method __init__ (line 411) | def __init__(self, alpha=1):
    method parameters (line 439) | def parameters(self):
    method hyperparameters (line 444) | def hyperparameters(self):
    method _initialize_params (line 451) | def _initialize_params(self, bandit):
    method _select_arm (line 467) | def _select_arm(self, bandit, context):
    method _update_params (line 478) | def _update_params(self, arm_id, rwd, context):
    method _reset_params (line 483) | def _reset_params(self):

FILE: numpy_ml/bandits/trainer.py
  function get_scriptdir (line 21) | def get_scriptdir():
  function mse (line 26) | def mse(bandit, policy):
  function smooth (line 42) | def smooth(prev, cur, weight):
  class BanditTrainer (line 79) | class BanditTrainer:
    method __init__ (line 80) | def __init__(self):
    method compare (line 87) | def compare(
    method train (line 170) | def train(
    method _train_step (line 268) | def _train_step(self, bandit, policy):
    method init_logs (line 275) | def init_logs(self, policies):
    method _print_run_summary (line 309) | def _print_run_summary(self, bandit, policy, regret):
    method _plot_reward (line 323) | def _plot_reward(self, optimal_rwd, policy, smooth_weight, axes=None, ...
    method _smoothed_metrics (line 367) | def _smoothed_metrics(self, policy, optimal_rwd, smooth_weight):

FILE: numpy_ml/factorization/factors.py
  class VanillaALS (line 8) | class VanillaALS:
    method __init__ (line 9) | def __init__(self, K, alpha=1, max_iter=200, tol=1e-4):
    method parameters (line 64) | def parameters(self):
    method hyperparameters (line 69) | def hyperparameters(self):
    method _init_factor_matrices (line 79) | def _init_factor_matrices(self, X, W=None, H=None):
    method _loss (line 89) | def _loss(self, X, Xhat):
    method _update_factor (line 95) | def _update_factor(self, X, A):
    method fit (line 100) | def fit(self, X, W=None, H=None, n_initializations=10, verbose=False):
    method _fit (line 141) | def _fit(self, X, W, H, verbose):
  class NMF (line 160) | class NMF:
    method __init__ (line 161) | def __init__(self, K, max_iter=200, tol=1e-4):
    method parameters (line 213) | def parameters(self):
    method hyperparameters (line 218) | def hyperparameters(self):
    method _init_factor_matrices (line 227) | def _init_factor_matrices(self, X, W, H):
    method _loss (line 247) | def _loss(self, X, Xhat):
    method _update_H (line 251) | def _update_H(self, X, W, H):
    method _update_W (line 262) | def _update_W(self, X, W, H):
    method fit (line 277) | def fit(self, X, W=None, H=None, n_initializations=10, verbose=False):
    method _fit (line 354) | def _fit(self, X, W, H, verbose):

FILE: numpy_ml/gmm/gmm.py
  class GMM (line 7) | class GMM(object):
    method __init__ (line 8) | def __init__(self, C=3, seed=None):
    method _initialize_params (line 48) | def _initialize_params(self, X):
    method likelihood_lower_bound (line 65) | def likelihood_lower_bound(self, X):
    method fit (line 93) | def fit(self, X, max_iter=100, tol=1e-3, verbose=False):
    method predict (line 145) | def predict(self, X, soft_labels=True):
    method _E_step (line 185) | def _E_step(self, X):
    method _M_step (line 210) | def _M_step(self, X):

FILE: numpy_ml/hmm/hmm.py
  class MultinomialHMM (line 7) | class MultinomialHMM:
    method __init__ (line 8) | def __init__(self, A=None, B=None, pi=None, eps=None):
    method generate (line 81) | def generate(self, n_steps, latent_state_types, obs_types):
    method log_likelihood (line 122) | def log_likelihood(self, O):
    method decode (line 180) | def decode(self, O):
    method _forward (line 291) | def _forward(self, Obs):
    method _backward (line 362) | def _backward(self, Obs):
    method _initialize_parameters (line 429) | def _initialize_parameters(self):
    method fit (line 451) | def fit(
    method _E_step (line 537) | def _E_step(self, O):
    method _M_step (line 622) | def _M_step(self, O, gamma, xi, phi):

FILE: numpy_ml/lda/lda.py
  class LDA (line 5) | class LDA(object):
    method __init__ (line 6) | def __init__(self, T=10):
    method _maximize_phi (line 36) | def _maximize_phi(self):
    method _maximize_gamma (line 60) | def _maximize_gamma(self):
    method _maximize_beta (line 74) | def _maximize_beta(self):
    method _maximize_alpha (line 99) | def _maximize_alpha(self, max_iters=1000, tol=0.1):
    method _E_step (line 137) | def _E_step(self):
    method _M_step (line 144) | def _M_step(self):
    method VLB (line 151) | def VLB(self):
    method initialize_parameters (line 189) | def initialize_parameters(self):
    method train (line 206) | def train(self, corpus, verbose=False, max_iter=1000, tol=5):
  function dg (line 255) | def dg(gamma, d, t):

FILE: numpy_ml/lda/lda_smoothed.py
  class SmoothedLDA (line 4) | class SmoothedLDA(object):
    method __init__ (line 5) | def __init__(self, T, **kwargs):
    method _init_params (line 42) | def _init_params(self, texts, tokens):
    method train (line 59) | def train(self, texts, tokens, n_gibbs=2000):
    method what_did_you_learn (line 90) | def what_did_you_learn(self, top_n=10):
    method fit_params (line 101) | def fit_params(self, C_wt, C_dt):
    method _estimate_topic_prob (line 137) | def _estimate_topic_prob(self, ii, d, C_wt, C_dt):
    method _gibbs_sampler (line 153) | def _gibbs_sampler(self, n_gibbs, texts):

FILE: numpy_ml/linear_models/bayesian_regression.py
  class BayesianLinearRegressionUnknownVariance (line 8) | class BayesianLinearRegressionUnknownVariance:
    method __init__ (line 9) | def __init__(self, alpha=1, beta=2, mu=0, V=None, fit_intercept=True):
    method fit (line 81) | def fit(self, X, y):
    method predict (line 139) | def predict(self, X):
  class BayesianLinearRegressionKnownVariance (line 167) | class BayesianLinearRegressionKnownVariance:
    method __init__ (line 168) | def __init__(self, mu=0, sigma=1, V=None, fit_intercept=True):
    method fit (line 237) | def fit(self, X, y):
    method predict (line 276) | def predict(self, X):

FILE: numpy_ml/linear_models/glm.py
  class GeneralizedLinearModel (line 48) | class GeneralizedLinearModel:
    method __init__ (line 49) | def __init__(self, link, fit_intercept=True, tol=1e-5, max_iter=100):
    method fit (line 129) | def fit(self, X, y):
    method predict (line 187) | def predict(self, X):

FILE: numpy_ml/linear_models/linear_regression.py
  class LinearRegression (line 6) | class LinearRegression:
    method __init__ (line 7) | def __init__(self, fit_intercept=True):
    method update (line 62) | def update(self, X, y, weights=None):
    method _update1D (line 142) | def _update1D(self, x, y, w):
    method _update2D (line 156) | def _update2D(self, X, y, W):
    method fit (line 172) | def fit(self, X, y, weights=None):
    method predict (line 218) | def predict(self, X):

FILE: numpy_ml/linear_models/logistic.py
  class LogisticRegression (line 5) | class LogisticRegression:
    method __init__ (line 6) | def __init__(self, penalty="l2", gamma=0, fit_intercept=True):
    method fit (line 90) | def fit(self, X, y, lr=0.01, tol=1e-7, max_iter=1e7):
    method _NLL (line 121) | def _NLL(self, X, y, y_pred):
    method _NLL_grad (line 143) | def _NLL_grad(self, X, y, y_pred):
    method predict (line 150) | def predict(self, X):
  function _sigmoid (line 171) | def _sigmoid(x):

FILE: numpy_ml/linear_models/naive_bayes.py
  class GaussianNBClassifier (line 5) | class GaussianNBClassifier:
    method __init__ (line 6) | def __init__(self, eps=1e-6):
    method fit (line 79) | def fit(self, X, y):
    method predict (line 128) | def predict(self, X):
    method _log_posterior (line 145) | def _log_posterior(self, X):
    method _log_class_posterior (line 166) | def _log_class_posterior(self, X, class_idx):

FILE: numpy_ml/linear_models/ridge.py
  class RidgeRegression (line 6) | class RidgeRegression:
    method __init__ (line 7) | def __init__(self, alpha=1, fit_intercept=True):
    method fit (line 61) | def fit(self, X, y):
    method predict (line 86) | def predict(self, X):

FILE: numpy_ml/neural_nets/activations/activations.py
  class ActivationBase (line 8) | class ActivationBase(ABC):
    method __init__ (line 9) | def __init__(self, **kwargs):
    method __call__ (line 13) | def __call__(self, z):
    method fn (line 20) | def fn(self, z):
    method grad (line 25) | def grad(self, x, **kwargs):
  class Sigmoid (line 30) | class Sigmoid(ActivationBase):
    method __init__ (line 31) | def __init__(self):
    method __str__ (line 35) | def __str__(self):
    method fn (line 39) | def fn(self, z):
    method grad (line 49) | def grad(self, x):
    method grad2 (line 60) | def grad2(self, x):
  class ReLU (line 73) | class ReLU(ActivationBase):
    method __init__ (line 97) | def __init__(self):
    method __str__ (line 100) | def __str__(self):
    method fn (line 104) | def fn(self, z):
    method grad (line 116) | def grad(self, x):
    method grad2 (line 128) | def grad2(self, x):
  class LeakyReLU (line 140) | class LeakyReLU(ActivationBase):
    method __init__ (line 161) | def __init__(self, alpha=0.3):
    method __str__ (line 165) | def __str__(self):
    method fn (line 169) | def fn(self, z):
    method grad (line 183) | def grad(self, x):
    method grad2 (line 198) | def grad2(self, x):
  class GELU (line 210) | class GELU(ActivationBase):
    method __init__ (line 211) | def __init__(self, approximate=True):
    method __str__ (line 235) | def __str__(self):
    method fn (line 239) | def fn(self, z):
    method grad (line 254) | def grad(self, x):
    method grad2 (line 279) | def grad2(self, x):
  class Tanh (line 304) | class Tanh(ActivationBase):
    method __init__ (line 305) | def __init__(self):
    method __str__ (line 309) | def __str__(self):
    method fn (line 313) | def fn(self, z):
    method grad (line 317) | def grad(self, x):
    method grad2 (line 328) | def grad2(self, x):
  class Affine (line 342) | class Affine(ActivationBase):
    method __init__ (line 343) | def __init__(self, slope=1, intercept=0):
    method __str__ (line 358) | def __str__(self):
    method fn (line 362) | def fn(self, z):
    method grad (line 372) | def grad(self, x):
    method grad2 (line 383) | def grad2(self, x):
  class Identity (line 395) | class Identity(Affine):
    method __init__ (line 396) | def __init__(self):
    method __str__ (line 407) | def __str__(self):
  class ELU (line 412) | class ELU(ActivationBase):
    method __init__ (line 413) | def __init__(self, alpha=1.0):
    method __str__ (line 445) | def __str__(self):
    method fn (line 449) | def fn(self, z):
    method grad (line 462) | def grad(self, x):
    method grad2 (line 476) | def grad2(self, x):
  class Exponential (line 491) | class Exponential(ActivationBase):
    method __init__ (line 492) | def __init__(self):
    method __str__ (line 496) | def __str__(self):
    method fn (line 500) | def fn(self, z):
    method grad (line 509) | def grad(self, x):
    method grad2 (line 520) | def grad2(self, x):
  class SELU (line 532) | class SELU(ActivationBase):
    method __init__ (line 558) | def __init__(self):
    method __str__ (line 564) | def __str__(self):
    method fn (line 568) | def fn(self, z):
    method grad (line 586) | def grad(self, x):
    method grad2 (line 601) | def grad2(self, x):
  class HardSigmoid (line 615) | class HardSigmoid(ActivationBase):
    method __init__ (line 616) | def __init__(self):
    method __str__ (line 627) | def __str__(self):
    method fn (line 631) | def fn(self, z):
    method grad (line 644) | def grad(self, x):
    method grad2 (line 657) | def grad2(self, x):
  class SoftPlus (line 669) | class SoftPlus(ActivationBase):
    method __init__ (line 670) | def __init__(self):
    method __str__ (line 684) | def __str__(self):
    method fn (line 688) | def fn(self, z):
    method grad (line 698) | def grad(self, x):
    method grad2 (line 710) | def grad2(self, x):

FILE: numpy_ml/neural_nets/initializers/initializers.py
  class ActivationInitializer (line 41) | class ActivationInitializer(object):
    method __init__ (line 42) | def __init__(self, param=None):
    method __call__ (line 53) | def __call__(self):
    method init_from_str (line 66) | def init_from_str(self, act_str):
  class SchedulerInitializer (line 106) | class SchedulerInitializer(object):
    method __init__ (line 107) | def __init__(self, param=None, lr=None):
    method __call__ (line 125) | def __call__(self):
    method init_from_str (line 138) | def init_from_str(self):
    method init_from_dict (line 156) | def init_from_dict(self):
  class OptimizerInitializer (line 176) | class OptimizerInitializer(object):
    method __init__ (line 177) | def __init__(self, param=None):
    method __call__ (line 189) | def __call__(self):
    method init_from_str (line 202) | def init_from_str(self):
    method init_from_dict (line 219) | def init_from_dict(self):
  class WeightInitializer (line 242) | class WeightInitializer(object):
    method __init__ (line 243) | def __init__(self, act_fn_str, mode="glorot_uniform"):
    method __call__ (line 282) | def __call__(self, weight_shape):
    method _calc_glorot_gain (line 293) | def _calc_glorot_gain(self):

FILE: numpy_ml/neural_nets/layers/layers.py
  class LayerBase (line 27) | class LayerBase(ABC):
    method __init__ (line 28) | def __init__(self, optimizer=None):
    method _init_params (line 42) | def _init_params(self, **kwargs):
    method forward (line 46) | def forward(self, z, **kwargs):
    method backward (line 51) | def backward(self, out, **kwargs):
    method freeze (line 55) | def freeze(self):
    method unfreeze (line 62) | def unfreeze(self):
    method flush_gradients (line 66) | def flush_gradients(self):
    method update (line 76) | def update(self, cur_loss=None):
    method set_params (line 88) | def set_params(self, summary_dict):
    method summary (line 130) | def summary(self):
  class DotProductAttention (line 139) | class DotProductAttention(LayerBase):
    method __init__ (line 140) | def __init__(self, scale=True, dropout_p=0, init="glorot_uniform", opt...
    method _init_params (line 194) | def _init_params(self):
    method hyperparameters (line 206) | def hyperparameters(self):
    method freeze (line 219) | def freeze(self):
    method unfreeze (line 227) | def unfreeze(self):
    method forward (line 232) | def forward(self, Q, K, V, retain_derived=True):
    method _fwd (line 305) | def _fwd(self, Q, K, V):
    method backward (line 313) | def backward(self, dLdy, retain_grads=True):
    method _bwd (line 352) | def _bwd(self, dy, q, k, v, weights):
  class RBM (line 365) | class RBM(LayerBase):
    method __init__ (line 366) | def __init__(self, n_out, K=1, init="glorot_uniform", optimizer=None):
    method _init_params (line 412) | def _init_params(self):
    method hyperparameters (line 438) | def hyperparameters(self):
    method CD_update (line 452) | def CD_update(self, X):
    method forward (line 468) | def forward(self, V, K=None, retain_derived=True):
    method backward (line 563) | def backward(self, retain_grads=True, *args):
    method reconstruct (line 587) | def reconstruct(self, X, n_steps=10, return_prob=False):
  class Add (line 633) | class Add(LayerBase):
    method __init__ (line 634) | def __init__(self, act_fn=None, optimizer=None):
    method _init_params (line 669) | def _init_params(self):
    method hyperparameters (line 675) | def hyperparameters(self):
    method forward (line 686) | def forward(self, X, retain_derived=True):
    method backward (line 713) | def backward(self, dLdY, retain_grads=True):
    method _bwd (line 739) | def _bwd(self, dLdY, X, _sum):
  class Multiply (line 745) | class Multiply(LayerBase):
    method __init__ (line 746) | def __init__(self, act_fn=None, optimizer=None):
    method _init_params (line 781) | def _init_params(self):
    method hyperparameters (line 787) | def hyperparameters(self):
    method forward (line 798) | def forward(self, X, retain_derived=True):
    method backward (line 825) | def backward(self, dLdY, retain_grads=True):
    method _bwd (line 851) | def _bwd(self, dLdY, X, prod):
  class Flatten (line 859) | class Flatten(LayerBase):
    method __init__ (line 860) | def __init__(self, keep_dim="first", optimizer=None):
    method _init_params (line 895) | def _init_params(self):
    method hyperparameters (line 901) | def hyperparameters(self):
    method forward (line 912) | def forward(self, X, retain_derived=True):
    method backward (line 939) | def backward(self, dLdy, retain_grads=True):
  class BatchNorm2D (line 969) | class BatchNorm2D(LayerBase):
    method __init__ (line 970) | def __init__(self, momentum=0.9, epsilon=1e-5, optimizer=None):
    method _init_params (line 1051) | def _init_params(self):
    method hyperparameters (line 1074) | def hyperparameters(self):
    method reset_running_stats (line 1089) | def reset_running_stats(self):
    method forward (line 1095) | def forward(self, X, retain_derived=True):
    method backward (line 1158) | def backward(self, dLdy, retain_grads=True):
    method _bwd (line 1192) | def _bwd(self, dLdy, X):
  class BatchNorm1D (line 1218) | class BatchNorm1D(LayerBase):
    method __init__ (line 1219) | def __init__(self, momentum=0.9, epsilon=1e-5, optimizer=None):
    method _init_params (line 1299) | def _init_params(self):
    method hyperparameters (line 1321) | def hyperparameters(self):
    method reset_running_stats (line 1336) | def reset_running_stats(self):
    method forward (line 1342) | def forward(self, X, retain_derived=True):
    method backward (line 1390) | def backward(self, dLdy, retain_grads=True):
    method _bwd (line 1424) | def _bwd(self, dLdy, X):
  class LayerNorm2D (line 1444) | class LayerNorm2D(LayerBase):
    method __init__ (line 1445) | def __init__(self, epsilon=1e-5, optimizer=None):
    method _init_params (line 1498) | def _init_params(self, X_shape):
    method hyperparameters (line 1514) | def hyperparameters(self):
    method forward (line 1528) | def forward(self, X, retain_derived=True):
    method backward (line 1572) | def backward(self, dLdy, retain_grads=True):
    method _bwd (line 1606) | def _bwd(self, dy, X):
  class LayerNorm1D (line 1634) | class LayerNorm1D(LayerBase):
    method __init__ (line 1635) | def __init__(self, epsilon=1e-5, optimizer=None):
    method _init_params (line 1687) | def _init_params(self):
    method hyperparameters (line 1700) | def hyperparameters(self):
    method forward (line 1714) | def forward(self, X, retain_derived=True):
    method backward (line 1750) | def backward(self, dLdy, retain_grads=True):
    method _bwd (line 1784) | def _bwd(self, dLdy, X):
  class Embedding (line 1811) | class Embedding(LayerBase):
    method __init__ (line 1812) | def __init__(
    method _init_params (line 1872) | def _init_params(self):
    method hyperparameters (line 1882) | def hyperparameters(self):
    method lookup (line 1896) | def lookup(self, ids):
    method forward (line 1912) | def forward(self, X, retain_derived=True):
    method _fwd (line 1954) | def _fwd(self, X):
    method backward (line 1965) | def backward(self, dLdy, retain_grads=True):
    method _bwd (line 1993) | def _bwd(self, dLdy, X):
  class FullyConnected (line 2010) | class FullyConnected(LayerBase):
    method __init__ (line 2011) | def __init__(self, n_out, act_fn=None, init="glorot_uniform", optimize...
    method _init_params (line 2064) | def _init_params(self):
    method hyperparameters (line 2076) | def hyperparameters(self):
    method forward (line 2090) | def forward(self, X, retain_derived=True):
    method _fwd (line 2122) | def _fwd(self, X):
    method backward (line 2131) | def backward(self, dLdy, retain_grads=True):
    method _bwd (line 2165) | def _bwd(self, dLdy, X):
    method _bwd2 (line 2178) | def _bwd2(self, dLdy, X, dLdy_bwd):
  class Softmax (line 2192) | class Softmax(LayerBase):
    method __init__ (line 2193) | def __init__(self, dim=-1, optimizer=None):
    method _init_params (line 2244) | def _init_params(self):
    method hyperparameters (line 2251) | def hyperparameters(self):
    method forward (line 2263) | def forward(self, X, retain_derived=True):
    method _fwd (line 2294) | def _fwd(self, X):
    method backward (line 2300) | def backward(self, dLdy, retain_grads=True):
    method _bwd (line 2330) | def _bwd(self, dLdy, X):
  class SparseEvolution (line 2352) | class SparseEvolution(LayerBase):
    method __init__ (line 2353) | def __init__(
    method _init_params (line 2421) | def _init_params(self):
    method hyperparameters (line 2438) | def hyperparameters(self):
    method forward (line 2454) | def forward(self, X, retain_derived=True):
    method _fwd (line 2486) | def _fwd(self, X):
    method backward (line 2496) | def backward(self, dLdy, retain_grads=True):
    method _bwd (line 2530) | def _bwd(self, dLdy, X):
    method _bwd2 (line 2544) | def _bwd2(self, dLdy, X, dLdy_bwd):
    method update (line 2558) | def update(self):
    method _evolve_connections (line 2570) | def _evolve_connections(self):
  class Conv1D (line 2603) | class Conv1D(LayerBase):
    method __init__ (line 2604) | def __init__(
    method _init_params (line 2684) | def _init_params(self):
    method hyperparameters (line 2696) | def hyperparameters(self):
    method forward (line 2714) | def forward(self, X, retain_derived=True):
    method backward (line 2756) | def backward(self, dLdy, retain_grads=True):
    method _bwd (line 2799) | def _bwd(self, dLdy, X, Z):
    method _backward_naive (line 2833) | def _backward_naive(self, dLdy, retain_grads=True):
  class Conv2D (line 2895) | class Conv2D(LayerBase):
    method __init__ (line 2896) | def __init__(
    method _init_params (line 2975) | def _init_params(self):
    method hyperparameters (line 2988) | def hyperparameters(self):
    method forward (line 3006) | def forward(self, X, retain_derived=True):
    method backward (line 3048) | def backward(self, dLdy, retain_grads=True):
    method _bwd (line 3093) | def _bwd(self, dLdy, X, Z):
    method _backward_naive (line 3118) | def _backward_naive(self, dLdy, retain_grads=True):
  class Pool2D (line 3181) | class Pool2D(LayerBase):
    method __init__ (line 3182) | def __init__(self, kernel_shape, stride=1, pad=0, mode="max", optimize...
    method _init_params (line 3213) | def _init_params(self):
    method hyperparameters (line 3218) | def hyperparameters(self):
    method forward (line 3235) | def forward(self, X, retain_derived=True):
    method backward (line 3290) | def backward(self, dLdY, retain_grads=True):
  class Deconv2D (line 3353) | class Deconv2D(LayerBase):
    method __init__ (line 3354) | def __init__(
    method _init_params (line 3408) | def _init_params(self):
    method hyperparameters (line 3421) | def hyperparameters(self):
    method forward (line 3438) | def forward(self, X, retain_derived=True):
    method backward (line 3480) | def backward(self, dLdY, retain_grads=True):
    method _bwd (line 3521) | def _bwd(self, dLdY, X, Z):
  class RNNCell (line 3576) | class RNNCell(LayerBase):
    method __init__ (line 3577) | def __init__(self, n_out, act_fn="Tanh", init="glorot_uniform", optimi...
    method _init_params (line 3626) | def _init_params(self):
    method hyperparameters (line 3655) | def hyperparameters(self):
    method forward (line 3669) | def forward(self, Xt):
    method backward (line 3717) | def backward(self, dLdAt):
    method flush_gradients (line 3766) | def flush_gradients(self):
  class LSTMCell (line 3782) | class LSTMCell(LayerBase):
    method __init__ (line 3783) | def __init__(
    method _init_params (line 3857) | def _init_params(self):
    method _get_params (line 3910) | def _get_params(self):
    method hyperparameters (line 3922) | def hyperparameters(self):
    method forward (line 3937) | def forward(self, Xt):
    method backward (line 3994) | def backward(self, dLdAt):
    method flush_gradients (line 4072) | def flush_gradients(self):
  class RNN (line 4088) | class RNN(LayerBase):
    method __init__ (line 4089) | def __init__(self, n_out, act_fn="Tanh", init="glorot_uniform", optimi...
    method _init_params (line 4118) | def _init_params(self):
    method hyperparameters (line 4129) | def hyperparameters(self):
    method forward (line 4140) | def forward(self, X):
    method backward (line 4167) | def backward(self, dLdA):
    method derived_variables (line 4193) | def derived_variables(self):
    method gradients (line 4201) | def gradients(self):
    method parameters (line 4209) | def parameters(self):
    method set_params (line 4213) | def set_params(self, summary_dict):
    method freeze (line 4233) | def freeze(self):
    method unfreeze (line 4240) | def unfreeze(self):
    method flush_gradients (line 4244) | def flush_gradients(self):
    method update (line 4248) | def update(self):
  class LSTM (line 4257) | class LSTM(LayerBase):
    method __init__ (line 4258) | def __init__(
    method _init_params (line 4296) | def _init_params(self):
    method hyperparameters (line 4307) | def hyperparameters(self):
    method forward (line 4319) | def forward(self, X):
    method backward (line 4346) | def backward(self, dLdA):
    method derived_variables (line 4372) | def derived_variables(self):
    method gradients (line 4380) | def gradients(self):
    method parameters (line 4388) | def parameters(self):
    method freeze (line 4392) | def freeze(self):
    method unfreeze (line 4399) | def unfreeze(self):
    method set_params (line 4403) | def set_params(self, summary_dict):
    method flush_gradients (line 4423) | def flush_gradients(self):
    method update (line 4427) | def update(self):

FILE: numpy_ml/neural_nets/losses/losses.py
  class ObjectiveBase (line 13) | class ObjectiveBase(ABC):
    method __init__ (line 14) | def __init__(self):
    method loss (line 18) | def loss(self, y_true, y_pred):
    method grad (line 22) | def grad(self, y_true, y_pred, **kwargs):
  class SquaredError (line 26) | class SquaredError(ObjectiveBase):
    method __init__ (line 27) | def __init__(self):
    method __call__ (line 42) | def __call__(self, y, y_pred):
    method __str__ (line 45) | def __str__(self):
    method loss (line 49) | def loss(y, y_pred):
    method grad (line 68) | def grad(y, y_pred, z, act_fn):
  class CrossEntropy (line 110) | class CrossEntropy(ObjectiveBase):
    method __init__ (line 111) | def __init__(self):
    method __call__ (line 126) | def __call__(self, y, y_pred):
    method __str__ (line 129) | def __str__(self):
    method loss (line 133) | def loss(y, y_pred):
    method grad (line 170) | def grad(y, y_pred):
  class VAELoss (line 225) | class VAELoss(ObjectiveBase):
    method __init__ (line 226) | def __init__(self):
    method __call__ (line 254) | def __call__(self, y, y_pred, t_mean, t_log_var):
    method __str__ (line 257) | def __str__(self):
    method loss (line 261) | def loss(y, y_pred, t_mean, t_log_var):
    method grad (line 296) | def grad(y, y_pred, t_mean, t_log_var):
  class WGAN_GPLoss (line 331) | class WGAN_GPLoss(ObjectiveBase):
    method __init__ (line 332) | def __init__(self, lambda_=10):
    method __call__ (line 385) | def __call__(self, Y_fake, module, Y_real=None, gradInterp=None):
    method __str__ (line 412) | def __str__(self):
    method loss (line 415) | def loss(self, Y_fake, module, Y_real=None, gradInterp=None):
    method grad (line 457) | def grad(self, Y_fake, module, Y_real=None, gradInterp=None):
  class NCELoss (line 514) | class NCELoss(ObjectiveBase):
    method __init__ (line 518) | def __init__(
    method _init_params (line 621) | def _init_params(self):
    method hyperparameters (line 648) | def hyperparameters(self):
    method __call__ (line 663) | def __call__(self, X, target, neg_samples=None, retain_derived=True):
    method __str__ (line 666) | def __str__(self):
    method freeze (line 674) | def freeze(self):
    method unfreeze (line 681) | def unfreeze(self):
    method flush_gradients (line 685) | def flush_gradients(self):
    method update (line 695) | def update(self, cur_loss=None):
    method loss (line 707) | def loss(self, X, target, neg_samples=None, retain_derived=True):
    method _loss (line 760) | def _loss(self, X, target, neg_samples):
    method grad (line 818) | def grad(self, retain_grads=True, update_params=True):
    method _grad (line 858) | def _grad(self, X, input_idx):

FILE: numpy_ml/neural_nets/models/vae.py
  class BernoulliVAE (line 12) | class BernoulliVAE(object):
    method __init__ (line 13) | def __init__(
    method _init_params (line 137) | def _init_params(self):
    method _build_encoder (line 142) | def _build_encoder(self):
    method _build_decoder (line 191) | def _build_decoder(self):
    method parameters (line 211) | def parameters(self):
    method hyperparameters (line 220) | def hyperparameters(self):
    method derived_variables (line 250) | def derived_variables(self):
    method gradients (line 275) | def gradients(self):
    method _sample (line 283) | def _sample(self, t_mean, t_log_var):
    method forward (line 308) | def forward(self, X_train):
    method backward (line 341) | def backward(self, X_train, X_recon):
    method update (line 387) | def update(self, cur_loss=None):
    method flush_gradients (line 395) | def flush_gradients(self):
    method fit (line 402) | def fit(self, X_train, n_epochs=20, batchsize=128, verbose=True):

FILE: numpy_ml/neural_nets/models/w2v.py
  class Word2Vec (line 12) | class Word2Vec(object):
    method __init__ (line 13) | def __init__(
    method _init_params (line 125) | def _init_params(self):
    method parameters (line 147) | def parameters(self):
    method hyperparameters (line 158) | def hyperparameters(self):
    method derived_variables (line 183) | def derived_variables(self):
    method gradients (line 196) | def gradients(self):
    method forward (line 206) | def forward(self, X, targets, retain_derived=True):
    method backward (line 235) | def backward(self):
    method update (line 242) | def update(self, cur_loss=None):
    method flush_gradients (line 248) | def flush_gradients(self):
    method get_embedding (line 253) | def get_embedding(self, word_ids):
    method _build_noise_distribution (line 271) | def _build_noise_distribution(self):
    method _train_epoch (line 296) | def _train_epoch(self, corpus_fps, encoding):
    method _train_batch (line 308) | def _train_batch(self, X, target):
    method minibatcher (line 314) | def minibatcher(self, corpus_fps, encoding):
    method fit (line 399) | def fit(

FILE: numpy_ml/neural_nets/models/wgan_gp.py
  class WGAN_GP (line 11) | class WGAN_GP(object):
    method __init__ (line 66) | def __init__(
    method _init_params (line 104) | def _init_params(self):
    method _build_generator (line 111) | def _build_generator(self):
    method _build_critic (line 132) | def _build_critic(self):
    method hyperparameters (line 154) | def hyperparameters(self):
    method parameters (line 170) | def parameters(self):
    method derived_variables (line 179) | def derived_variables(self):
    method gradients (line 192) | def gradients(self):
    method forward (line 206) | def forward(self, X, module, retain_derived=True):
    method backward (line 246) | def backward(self, grad, module, retain_grads=True):
    method _dGradInterp (line 284) | def _dGradInterp(self, dLdGradInterp, dYi_outs):
    method update_critic (line 304) | def update_critic(self, X_real):
    method update_generator (line 395) | def update_generator(self, X_shape):
    method flush_gradients (line 424) | def flush_gradients(self, module):
    method update (line 436) | def update(self, module, module_loss=None):
    method fit (line 449) | def fit(

FILE: numpy_ml/neural_nets/modules/modules.py
  class ModuleBase (line 21) | class ModuleBase(ABC):
    method __init__ (line 22) | def __init__(self):
    method _init_params (line 29) | def _init_params(self, **kwargs):
    method forward (line 33) | def forward(self, z, **kwargs):
    method backward (line 37) | def backward(self, out, **kwargs):
    method components (line 41) | def components(self):
    method freeze (line 48) | def freeze(self):
    method unfreeze (line 53) | def unfreeze(self):
    method update (line 58) | def update(self, cur_loss=None):
    method flush_gradients (line 64) | def flush_gradients(self):
    method set_params (line 76) | def set_params(self, summary_dict):
    method summary (line 111) | def summary(self):
  class WavenetResidualModule (line 119) | class WavenetResidualModule(ModuleBase):
    method __init__ (line 120) | def __init__(
    method _init_params (line 183) | def _init_params(self):
    method parameters (line 216) | def parameters(self):
    method hyperparameters (line 229) | def hyperparameters(self):
    method derived_variables (line 256) | def derived_variables(self):
    method gradients (line 275) | def gradients(self):
    method forward (line 287) | def forward(self, X_main, X_skip=None):
    method backward (line 330) | def backward(self, dY_skip, dY_main=None):
  class SkipConnectionIdentityModule (line 360) | class SkipConnectionIdentityModule(ModuleBase):
    method __init__ (line 361) | def __init__(
    method _init_params (line 445) | def _init_params(self):
    method _init_conv2 (line 463) | def _init_conv2(self):
    method parameters (line 475) | def parameters(self):
    method hyperparameters (line 488) | def hyperparameters(self):
    method derived_variables (line 514) | def derived_variables(self):
    method gradients (line 534) | def gradients(self):
    method forward (line 546) | def forward(self, X, retain_derived=True):
    method backward (line 583) | def backward(self, dLdY, retain_grads=True):
  class SkipConnectionConvModule (line 615) | class SkipConnectionConvModule(ModuleBase):
    method __init__ (line 616) | def __init__(
    method _init_params (line 728) | def _init_params(self, X=None):
    method _calc_skip_padding (line 755) | def _calc_skip_padding(self, X):
    method _init_conv_skip (line 790) | def _init_conv_skip(self, X):
    method parameters (line 803) | def parameters(self):
    method hyperparameters (line 820) | def hyperparameters(self):
    method derived_variables (line 863) | def derived_variables(self):
    method gradients (line 889) | def gradients(self):
    method forward (line 905) | def forward(self, X, retain_derived=True):
    method backward (line 948) | def backward(self, dLdY, retain_grads=True):
  class BidirectionalLSTM (line 987) | class BidirectionalLSTM(ModuleBase):
    method __init__ (line 988) | def __init__(
    method _init_params (line 1033) | def _init_params(self):
    method forward (line 1049) | def forward(self, X):
    method backward (line 1092) | def backward(self, dLdA):
    method derived_variables (line 1143) | def derived_variables(self):
    method gradients (line 1154) | def gradients(self):
    method parameters (line 1164) | def parameters(self):
    method hyperparameters (line 1174) | def hyperparameters(self):
  class MultiHeadedAttentionModule (line 1192) | class MultiHeadedAttentionModule(ModuleBase):
    method __init__ (line 1193) | def __init__(self, n_heads=8, dropout_p=0, init="glorot_uniform", opti...
    method _init_params (line 1290) | def _init_params(self):
    method forward (line 1313) | def forward(self, Q, K, V):
    method backward (line 1343) | def backward(self, dLdy):
    method derived_variables (line 1365) | def derived_variables(self):
    method gradients (line 1384) | def gradients(self):
    method parameters (line 1397) | def parameters(self):
    method hyperparameters (line 1410) | def hyperparameters(self):

FILE: numpy_ml/neural_nets/optimizers/optimizers.py
  class OptimizerBase (line 8) | class OptimizerBase(ABC):
    method __init__ (line 9) | def __init__(self, lr, scheduler=None):
    method __call__ (line 22) | def __call__(self, param, param_grad, param_name, cur_loss=None):
    method step (line 25) | def step(self):
    method reset_step (line 29) | def reset_step(self):
    method copy (line 33) | def copy(self):
    method set_params (line 37) | def set_params(self, hparam_dict=None, cache_dict=None):
    method update (line 54) | def update(self, param, param_grad, param_name, cur_loss=None):
  class SGD (line 58) | class SGD(OptimizerBase):
    method __init__ (line 59) | def __init__(
    method __str__ (line 103) | def __str__(self):
    method update (line 110) | def update(self, param, param_grad, param_name, cur_loss=None):
  class AdaGrad (line 156) | class AdaGrad(OptimizerBase):
    method __init__ (line 157) | def __init__(self, lr=0.01, eps=1e-7, clip_norm=None, lr_scheduler=Non...
    method __str__ (line 208) | def __str__(self):
    method update (line 215) | def update(self, param, param_grad, param_name, cur_loss=None):
  class RMSProp (line 262) | class RMSProp(OptimizerBase):
    method __init__ (line 263) | def __init__(
    method __str__ (line 314) | def __str__(self):
    method update (line 322) | def update(self, param, param_grad, param_name, cur_loss=None):
  class Adam (line 364) | class Adam(OptimizerBase):
    method __init__ (line 365) | def __init__(
    method __str__ (line 419) | def __str__(self):
    method update (line 427) | def update(self, param, param_grad, param_name, cur_loss=None):

FILE: numpy_ml/neural_nets/schedulers/schedulers.py
  function gaussian_cdf (line 9) | def gaussian_cdf(x, mean, var):
  class SchedulerBase (line 19) | class SchedulerBase(ABC):
    method __init__ (line 20) | def __init__(self):
    method __call__ (line 24) | def __call__(self, step=None, cur_loss=None):
    method copy (line 27) | def copy(self):
    method set_params (line 31) | def set_params(self, hparam_dict):
    method learning_rate (line 39) | def learning_rate(self, step=None):
  class ConstantScheduler (line 43) | class ConstantScheduler(SchedulerBase):
    method __init__ (line 44) | def __init__(self, lr=0.01, **kwargs):
    method __str__ (line 57) | def __str__(self):
    method learning_rate (line 60) | def learning_rate(self, **kwargs):
  class ExponentialScheduler (line 72) | class ExponentialScheduler(SchedulerBase):
    method __init__ (line 73) | def __init__(
    method __str__ (line 119) | def __str__(self):
    method learning_rate (line 124) | def learning_rate(self, step, **kwargs):
  class NoamScheduler (line 144) | class NoamScheduler(SchedulerBase):
    method __init__ (line 145) | def __init__(self, model_dim=512, scale_factor=1, warmup_steps=4000, *...
    method __str__ (line 187) | def __str__(self):
    method learning_rate (line 192) | def learning_rate(self, step, **kwargs):
  class KingScheduler (line 198) | class KingScheduler(SchedulerBase):
    method __init__ (line 199) | def __init__(self, initial_lr=0.01, patience=1000, decay=0.99, **kwargs):
    method __str__ (line 241) | def __str__(self):
    method _steps_without_decrease (line 246) | def _steps_without_decrease(self, robust=False, check_all=False):
    method _p_decreasing (line 289) | def _p_decreasing(self, loss_history, i):
    method learning_rate (line 325) | def learning_rate(self, step, cur_loss):

FILE: numpy_ml/neural_nets/utils/utils.py
  function minibatch (line 8) | def minibatch(X, batchsize=256, shuffle=True):
  function calc_pad_dims_2D (line 51) | def calc_pad_dims_2D(X_shape, out_dim, kernel_shape, stride, dilation=0):
  function calc_pad_dims_1D (line 123) | def calc_pad_dims_1D(X_shape, l_out, kernel_width, stride, dilation=0, c...
  function pad1D (line 195) | def pad1D(X, pad, kernel_width=None, stride=None, dilation=0):
  function pad2D (line 252) | def pad2D(X, pad, kernel_shape=None, stride=None, dilation=0):
  function dilate (line 309) | def dilate(X, d):
  function calc_fan (line 352) | def calc_fan(weight_shape):
  function calc_conv_out_dims (line 380) | def calc_conv_out_dims(X_shape, W_shape, stride=1, pad=0, dilation=0):
  function _im2col_indices (line 447) | def _im2col_indices(X_shape, fr, fc, p, s, d=0):
  function im2col (line 486) | def im2col(X, W_shape, pad, stride, dilation=0):
  function col2im (line 546) | def col2im(X_col, X_shape, W_shape, pad, stride, dilation=0):
  function conv2D (line 603) | def conv2D(X, W, stride, pad, dilation=0):
  function conv1D (line 668) | def conv1D(X, W, stride, pad, dilation=0):
  function deconv2D_naive (line 720) | def deconv2D_naive(X, W, stride, pad, dilation=0):
  function conv2D_naive (line 797) | def conv2D_naive(X, W, stride, pad, dilation=0):
  function he_uniform (line 864) | def he_uniform(weight_shape):
  function he_normal (line 895) | def he_normal(weight_shape):
  function glorot_uniform (line 926) | def glorot_uniform(weight_shape, gain=1.0):
  function glorot_normal (line 962) | def glorot_normal(weight_shape, gain=1.0):
  function truncated_normal (line 998) | def truncated_normal(mean, std, out_shape):

FILE: numpy_ml/neural_nets/wrappers/wrappers.py
  class WrapperBase (line 11) | class WrapperBase(ABC):
    method __init__ (line 12) | def __init__(self, wrapped_layer):
    method _init_wrapper_params (line 20) | def _init_wrapper_params(self):
    method forward (line 24) | def forward(self, z, **kwargs):
    method backward (line 29) | def backward(self, out, **kwargs):
    method trainable (line 34) | def trainable(self):
    method parameters (line 39) | def parameters(self):
    method hyperparameters (line 44) | def hyperparameters(self):
    method derived_variables (line 55) | def derived_variables(self):
    method gradients (line 68) | def gradients(self):
    method act_fn (line 73) | def act_fn(self):
    method X (line 78) | def X(self):
    method _init_params (line 82) | def _init_params(self):
    method freeze (line 89) | def freeze(self):
    method unfreeze (line 96) | def unfreeze(self):
    method flush_gradients (line 100) | def flush_gradients(self):
    method update (line 108) | def update(self, lr):
    method _set_wrapper_params (line 117) | def _set_wrapper_params(self, pdict):
    method set_params (line 123) | def set_params(self, summary_dict):
    method summary (line 142) | def summary(self):
  class Dropout (line 152) | class Dropout(WrapperBase):
    method __init__ (line 153) | def __init__(self, wrapped_layer, p):
    method _init_wrapper_params (line 177) | def _init_wrapper_params(self):
    method forward (line 181) | def forward(self, X, retain_derived=True):
    method backward (line 212) | def backward(self, dLdy, retain_grads=True):
  function init_wrappers (line 235) | def init_wrappers(layer, wrappers_list):

FILE: numpy_ml/ngram/ngram.py
  class NGramBase (line 12) | class NGramBase(ABC):
    method __init__ (line 13) | def __init__(self, N, unk=True, filter_stopwords=True, filter_punctuat...
    method train (line 38) | def train(self, corpus_fp, vocab=None, encoding=None):
    method _train (line 62) | def _train(self, corpus_fp, vocab=None, encoding=None):
    method completions (line 105) | def completions(self, words, N):
    method generate (line 135) | def generate(self, N, seed_words=["<bol>"], n_sentences=5):
    method perplexity (line 178) | def perplexity(self, words, N):
    method cross_entropy (line 220) | def cross_entropy(self, words, N):
    method _log_prob (line 255) | def _log_prob(self, words, N):
    method _n_completions (line 271) | def _n_completions(self, words, N):
    method _num_grams_with_count (line 285) | def _num_grams_with_count(self, C, N):
    method log_prob (line 300) | def log_prob(self, words, N):
    method _log_ngram_prob (line 308) | def _log_ngram_prob(self, ngram):
  class MLENGram (line 313) | class MLENGram(NGramBase):
    method __init__ (line 314) | def __init__(self, N, unk=True, filter_stopwords=True, filter_punctuat...
    method log_prob (line 335) | def log_prob(self, words, N):
    method _log_ngram_prob (line 356) | def _log_ngram_prob(self, ngram):
  class AdditiveNGram (line 364) | class AdditiveNGram(NGramBase):
    method __init__ (line 365) | def __init__(
    method log_prob (line 402) | def log_prob(self, words, N):
    method _log_ngram_prob (line 446) | def _log_ngram_prob(self, ngram):
  class GoodTuringNGram (line 459) | class GoodTuringNGram(NGramBase):
    method __init__ (line 460) | def __init__(
    method train (line 490) | def train(self, corpus_fp, vocab=None, encoding=None):
    method log_prob (line 512) | def log_prob(self, words, N):
    method _calc_smoothed_counts (line 571) | def _calc_smoothed_counts(self):
    method _log_ngram_prob (line 629) | def _log_ngram_prob(self, ngram):
    method _fit_count_models (line 644) | def _fit_count_models(self):

FILE: numpy_ml/nonparametric/gp.py
  class GPRegression (line 18) | class GPRegression:
    method __init__ (line 19) | def __init__(self, kernel="RBFKernel", alpha=1e-10):
    method fit (line 44) | def fit(self, X, y):
    method predict (line 64) | def predict(self, X, conf_interval=0.95, return_cov=False):
    method marginal_log_likelihood (line 156) | def marginal_log_likelihood(self, kernel_params=None):
    method sample (line 223) | def sample(self, X, n_samples=1, dist="posterior_predictive"):

FILE: numpy_ml/nonparametric/kernel_regression.py
  class KernelRegression (line 4) | class KernelRegression:
    method __init__ (line 5) | def __init__(self, kernel=None):
    method fit (line 40) | def fit(self, X, y):
    method predict (line 53) | def predict(self, X):

FILE: numpy_ml/nonparametric/knn.py
  class KNN (line 9) | class KNN:
    method __init__ (line 10) | def __init__(
    method fit (line 47) | def fit(self, X, y):
    method predict (line 62) | def predict(self, X):

FILE: numpy_ml/plots/bandit_plots.py
  function random_multinomial_mab (line 23) | def random_multinomial_mab(n_arms=10, n_choices_per_arm=5, reward_range=...
  function random_bernoulli_mab (line 39) | def random_bernoulli_mab(n_arms=10):
  function plot_epsilon_greedy_multinomial_payoff (line 46) | def plot_epsilon_greedy_multinomial_payoff():
  function plot_ucb1_multinomial_payoff (line 65) | def plot_ucb1_multinomial_payoff():
  function plot_thompson_sampling_beta_binomial_payoff (line 82) | def plot_thompson_sampling_beta_binomial_payoff():
  function plot_lin_ucb (line 99) | def plot_lin_ucb():
  function plot_ucb1_gaussian_shortest_path (line 114) | def plot_ucb1_gaussian_shortest_path():
  function plot_comparison (line 151) | def plot_comparison():

FILE: numpy_ml/plots/gmm_plots.py
  function plot_countour (line 20) | def plot_countour(X, x, y, z, ax, xlim, ylim):
  function plot_clusters (line 41) | def plot_clusters(model, X, ax):
  function plot (line 72) | def plot():

FILE: numpy_ml/plots/hmm_plots.py
  function generate_training_data (line 16) | def generate_training_data(params, n_steps=500, n_examples=15):
  function default_hmm (line 32) | def default_hmm():
  function plot_matrices (line 58) | def plot_matrices(params, best, best_theirs):
  function test_HMM (line 130) | def test_HMM():

FILE: numpy_ml/plots/lda_plots.py
  function generate_corpus (line 16) | def generate_corpus():
  function plot_unsmoothed (line 62) | def plot_unsmoothed():

FILE: numpy_ml/plots/lm_plots.py
  function random_binary_tensor (line 33) | def random_binary_tensor(shape, sparsity=0.5):
  function random_regression_problem (line 38) | def random_regression_problem(n_ex, n_in, n_out, intercept=0, std=1, see...
  function random_classification_problem (line 54) | def random_classification_problem(n_ex, n_classes, n_in, seed=0):
  function plot_logistic (line 69) | def plot_logistic():
  function plot_bayes (line 112) | def plot_bayes():
  function plot_regression (line 219) | def plot_regression():

FILE: numpy_ml/plots/ngram_plots.py
  function plot_count_models (line 15) | def plot_count_models(GT, N):
  function compare_probs (line 31) | def compare_probs(fp, N):
  function plot_gt_freqs (line 80) | def plot_gt_freqs(fp):

FILE: numpy_ml/plots/nn_activations_plots.py
  function plot_activations (line 25) | def plot_activations():

FILE: numpy_ml/plots/nn_schedulers_plots.py
  function king_loss_fn (line 21) | def king_loss_fn(x):
  function plot_schedulers (line 32) | def plot_schedulers():

FILE: numpy_ml/plots/nonparametric_plots.py
  function random_regression_problem (line 18) | def random_regression_problem(n_ex, n_in, n_out, d=3, intercept=0, std=1...
  function plot_regression (line 35) | def plot_regression():
  function plot_knn (line 104) | def plot_knn():
  function plot_gp (line 166) | def plot_gp():
  function plot_gp_dist (line 196) | def plot_gp_dist():

FILE: numpy_ml/plots/rl_plots.py
  function test_cross_entropy_agent (line 13) | def test_cross_entropy_agent():
  function test_monte_carlo_agent (line 28) | def test_monte_carlo_agent():
  function test_temporal_difference_agent (line 52) | def test_temporal_difference_agent():
  function test_dyna_agent (line 92) | def test_dyna_agent():

FILE: numpy_ml/plots/trees_plots.py
  function plot (line 20) | def plot():

FILE: numpy_ml/preprocessing/dsp.py
  function batch_resample (line 11) | def batch_resample(X, new_dim, mode="bilinear"):
  function nn_interpolate_2D (line 53) | def nn_interpolate_2D(X, x, y):
  function nn_interpolate_1D (line 84) | def nn_interpolate_1D(X, t):
  function bilinear_interpolate (line 106) | def bilinear_interpolate(X, x, y):
  function DCT (line 161) | def DCT(frame, orthonormal=True):
  function __DCT2 (line 212) | def __DCT2(frame):
  function DFT (line 224) | def DFT(frame, positive_only=True):
  function dft_bins (line 276) | def dft_bins(N, fs=44000, positive_only=True):
  function magnitude_spectrum (line 304) | def magnitude_spectrum(frames):
  function power_spectrum (line 323) | def power_spectrum(frames, scale=False):
  function to_frames (line 354) | def to_frames(x, frame_width, stride, writeable=False):
  function autocorrelate1D (line 416) | def autocorrelate1D(x):
  function preemphasis (line 458) | def preemphasis(x, alpha):
  function cepstral_lifter (line 489) | def cepstral_lifter(mfccs, D):
  function mel_spectrogram (line 523) | def mel_spectrogram(
  function mfcc (line 613) | def mfcc(
  function mel2hz (line 719) | def mel2hz(mel, formula="htk"):
  function hz2mel (line 745) | def hz2mel(hz, formula="htk"):
  function mel_filterbank (line 772) | def mel_filterbank(

FILE: numpy_ml/preprocessing/general.py
  function minibatch (line 16) | def minibatch(X, batchsize=256, shuffle=True):
  class OneHotEncoder (line 54) | class OneHotEncoder:
    method __init__ (line 55) | def __init__(self):
    method __call__ (line 69) | def __call__(self, labels):
    method fit (line 72) | def fit(self, categories):
    method transform (line 86) | def transform(self, labels, categories=None):
    method inverse_transform (line 119) | def inverse_transform(self, Y):
  class Standardizer (line 141) | class Standardizer:
    method __init__ (line 142) | def __init__(self, with_mean=True, with_std=True):
    method hyperparameters (line 172) | def hyperparameters(self):
    method parameters (line 177) | def parameters(self):
    method __call__ (line 184) | def __call__(self, X):
    method fit (line 187) | def fit(self, X):
    method transform (line 216) | def transform(self, X):
    method inverse_transform (line 244) | def inverse_transform(self, Z):
  class FeatureHasher (line 275) | class FeatureHasher:
    method __init__ (line 276) | def __init__(self, n_dim=256, sparse=True):
    method encode (line 301) | def encode(self, examples):
    method _encode_dense (line 348) | def _encode_dense(self, examples):
    method _encode_sparse (line 368) | def _encode_sparse(self, examples):

FILE: numpy_ml/preprocessing/nlp.py
  function ngrams (line 58) | def ngrams(sequence, N):
  function tokenize_whitespace (line 64) | def tokenize_whitespace(
  function tokenize_words (line 77) | def tokenize_words(
  function tokenize_words_bytes (line 89) | def tokenize_words_bytes(
  function tokenize_bytes_raw (line 112) | def tokenize_bytes_raw(line, encoding="utf-8", splitter=None, **kwargs):
  function bytes_to_chars (line 141) | def bytes_to_chars(byte_list, encoding="utf-8"):
  function tokenize_chars (line 151) | def tokenize_chars(line, lowercase=True, filter_punctuation=True, **kwar...
  function remove_stop_words (line 162) | def remove_stop_words(words):
  function strip_punctuation (line 167) | def strip_punctuation(line):
  class BytePairEncoder (line 177) | class BytePairEncoder(object):
    method __init__ (line 178) | def __init__(self, max_merges=3000, encoding="utf-8"):
    method fit (line 220) | def fit(self, corpus_fps, encoding="utf-8"):
    method _get_counts (line 264) | def _get_counts(self, vocab):
    method _merge (line 273) | def _merge(self, bigram, vocab):
    method transform (line 284) | def transform(self, text):
    method _transform (line 310) | def _transform(self, text):
    method inverse_transform (line 339) | def inverse_transform(self, codes):
    method codebook (line 377) | def codebook(self):
    method tokens (line 389) | def tokens(self):
  class Node (line 399) | class Node(object):
    method __init__ (line 400) | def __init__(self, key, val):
    method __gt__ (line 406) | def __gt__(self, other):
    method __ge__ (line 412) | def __ge__(self, other):
    method __lt__ (line 418) | def __lt__(self, other):
    method __le__ (line 424) | def __le__(self, other):
  class HuffmanEncoder (line 431) | class HuffmanEncoder(object):
    method fit (line 432) | def fit(self, text):
    method transform (line 463) | def transform(self, text):
    method inverse_transform (line 484) | def inverse_transform(self, codes):
    method tokens (line 506) | def tokens(self):
    method codes (line 511) | def codes(self):
    method _counter (line 515) | def _counter(self, text):
    method _build_tree (line 521) | def _build_tree(self, text):
    method _generate_codes (line 548) | def _generate_codes(self):
    method _build_code (line 554) | def _build_code(self, root, current_code):
  class Token (line 573) | class Token:
    method __init__ (line 574) | def __init__(self, word):
    method __repr__ (line 578) | def __repr__(self):
  class TFIDFEncoder (line 583) | class TFIDFEncoder:
    method __init__ (line 584) | def __init__(
    method fit (line 691) | def fit(self, corpus_seq, encoding="utf-8-sig"):
    method _encode_document (line 769) | def _encode_document(
    method _keep_top_n_tokens (line 821) | def _keep_top_n_tokens(self):
    method _drop_low_freq_tokens (line 865) | def _drop_low_freq_tokens(self):
    method _sort_tokens (line 904) | def _sort_tokens(self):
    method _calc_idf (line 934) | def _calc_idf(self):
    method transform (line 964) | def transform(self, ignore_special_chars=True):
  class Vocabulary (line 1008) | class Vocabulary:
    method __init__ (line 1009) | def __init__(
    method __len__ (line 1060) | def __len__(self):
    method __iter__ (line 1064) | def __iter__(self):
    method __contains__ (line 1068) | def __contains__(self, word):
    method __getitem__ (line 1072) | def __getitem__(self, key):
    method n_tokens (line 1083) | def n_tokens(self):
    method n_words (line 1088) | def n_words(self):
    method shape (line 1093) | def shape(self):
    method most_common (line 1097) | def most_common(self, n=5):
    method words_with_count (line 1101) | def words_with_count(self, k):
    method filter (line 1105) | def filter(self, words, unk=True):  # noqa: A003
    method words_to_indices (line 1128) | def words_to_indices(self, words):
    method indices_to_words (line 1148) | def indices_to_words(self, indices):
    method fit (line 1166) | def fit(self, corpus_fps, encoding="utf-8-sig"):
    method _keep_top_n_tokens (line 1264) | def _keep_top_n_tokens(self):
    method _drop_low_freq_tokens (line 1298) | def _drop_low_freq_tokens(self):

FILE: numpy_ml/rl_models/agents.py
  class AgentBase (line 12) | class AgentBase(ABC):
    method __init__ (line 13) | def __init__(self, env):
    method _create_2num_dicts (line 21) | def _create_2num_dicts(self, obs_encoder=None, act_encoder=None):
    method flush_history (line 40) | def flush_history(self):
    method act (line 46) | def act(self, obs):
    method greedy_policy (line 51) | def greedy_policy(self, **kwargs):
    method run_episode (line 65) | def run_episode(self, max_steps, render=False):
    method update (line 87) | def update(self):
  class CrossEntropyAgent (line 101) | class CrossEntropyAgent(AgentBase):
    method __init__ (line 102) | def __init__(self, env, n_samples_per_episode=500, retain_prcnt=0.2):
    method _init_params (line 164) | def _init_params(self):
    method act (line 195) | def act(self, obs):
    method run_episode (line 238) | def run_episode(self, max_steps, render=False):
    method _episode (line 278) | def _episode(self, W, b, max_steps, render):
    method update (line 328) | def update(self):
    method _sample_thetas (line 350) | def _sample_thetas(self):
    method greedy_policy (line 360) | def greedy_policy(self, max_steps, render=True):
  class MonteCarloAgent (line 391) | class MonteCarloAgent(AgentBase):
    method __init__ (line 392) | def __init__(self, env, off_policy=False, temporal_discount=0.9, epsil...
    method _init_params (line 422) | def _init_params(self):
    method _epsilon_soft_policy (line 458) | def _epsilon_soft_policy(self, s, a=None):
    method _greedy (line 529) | def _greedy(self, s, a=None):
    method _on_policy_update (line 562) | def _on_policy_update(self):
    method _off_policy_update (line 599) | def _off_policy_update(self):
    method act (line 642) | def act(self, obs):
    method run_episode (line 661) | def run_episode(self, max_steps, render=False):
    method _episode (line 685) | def _episode(self, max_steps, render):
    method update (line 732) | def update(self):
    method greedy_policy (line 745) | def greedy_policy(self, max_steps, render=True):
  class TemporalDifferenceAgent (line 793) | class TemporalDifferenceAgent(AgentBase):
    method __init__ (line 794) | def __init__(
    method _init_params (line 901) | def _init_params(self):
    method run_episode (line 945) | def run_episode(self, max_steps, render=False):
    method train_episode (line 967) | def train_episode(self, max_steps, render=False):
    method _episode (line 992) | def _episode(self, max_steps, render, update=True):
    method _epsilon_soft_policy (line 1054) | def _epsilon_soft_policy(self, s, a=None):
    method _greedy (line 1119) | def _greedy(self, s, a=None):
    method _on_policy_update (line 1155) | def _on_policy_update(self, s, a, r, s_, a_):
    method _off_policy_update (line 1198) | def _off_policy_update(self, s, a, r, s_):
    method update (line 1224) | def update(self):
    method act (line 1235) | def act(self, obs):
    method greedy_policy (line 1254) | def greedy_policy(self, max_steps, render=True):
  class DynaAgent (line 1304) | class DynaAgent(AgentBase):
    method __init__ (line 1305) | def __init__(
    method _init_params (line 1428) | def _init_params(self):
    method act (line 1480) | def act(self, obs):
    method _epsilon_soft_policy (line 1499) | def _epsilon_soft_policy(self, s, a=None):
    method _greedy (line 1564) | def _greedy(self, s, a=None):
    method update (line 1600) | def update(self):
    method _update_queue (line 1620) | def _update_queue(self, s, a):
    method _calc_priority (line 1642) | def _calc_priority(self, s, a):
    method _simulate_behavior (line 1678) | def _simulate_behavior(self):
    method _update (line 1717) | def _update(self, s, a):
    method run_episode (line 1752) | def run_episode(self, max_steps, render=False):
    method train_episode (line 1773) | def train_episode(self, max_steps, render=False):
    method _episode (line 1796) | def _episode(self, max_steps, render, update=True):
    method greedy_policy (line 1868) | def greedy_policy(self, max_steps, render=True):

FILE: numpy_ml/rl_models/rl_utils.py
  class EnvModel (line 28) | class EnvModel(object):
    method __init__ (line 44) | def __init__(self):
    method __setitem__ (line 48) | def __setitem__(self, key, value):
    method __getitem__ (line 53) | def __getitem__(self, key):
    method __contains__ (line 58) | def __contains__(self, key):
    method state_action_pairs (line 65) | def state_action_pairs(self):
    method reward_outcome_pairs (line 69) | def reward_outcome_pairs(self, s, a):
    method outcome_probs (line 76) | def outcome_probs(self, s, a):
    method state_action_pairs_leading_to_outcome (line 100) | def state_action_pairs_leading_to_outcome(self, outcome):
  function tile_state_space (line 124) | def tile_state_space(
  function get_gym_environs (line 200) | def get_gym_environs():
  function get_gym_stats (line 205) | def get_gym_stats():
  function is_tuple (line 231) | def is_tuple(env):
  function is_multidimensional (line 257) | def is_multidimensional(env):
  function is_continuous (line 297) | def is_continuous(env, tuple_action, tuple_obs):
  function action_stats (line 334) | def action_stats(env, md_action, cont_action):
  function obs_stats (line 382) | def obs_stats(env, md_obs, cont_obs):
  function env_stats (line 430) | def env_stats(env):

FILE: numpy_ml/rl_models/tiles/tiles3.py
  class IHT (line 36) | class IHT:
    method __init__ (line 39) | def __init__(self, sizeval):
    method __str__ (line 44) | def __str__(self):
    method count (line 57) | def count(self):
    method fullp (line 60) | def fullp(self):
    method getindex (line 63) | def getindex(self, obj, readonly=False):
  function hashcoords (line 81) | def hashcoords(coordinates, m, readonly=False):
  function tiles (line 90) | def tiles(ihtORsize, numtilings, floats, ints=[], readonly=False):
  function tileswrap (line 106) | def tileswrap(ihtORsize, numtilings, floats, wrapwidths, ints=[], readon...

FILE: numpy_ml/rl_models/trainer.py
  class Trainer (line 5) | class Trainer(object):
    method __init__ (line 6) | def __init__(self, agent, env):
    method _train_episode (line 21) | def _train_episode(self, max_steps, render_every=None):
    method train (line 33) | def train(
    method plot_rewards (line 103) | def plot_rewards(self, rwd_greedy):

FILE: numpy_ml/tests/nn_torch_models.py
  function torchify (line 17) | def torchify(var, requires_grad=True):
  function torch_gradient_generator (line 21) | def torch_gradient_generator(fn, **kwargs):
  function torch_xe_grad (line 32) | def torch_xe_grad(y, z):
  function torch_mse_grad (line 41) | def torch_mse_grad(y, z, act_fn):
  class TorchVAELoss (line 51) | class TorchVAELoss(nn.Module):
    method __init__ (line 52) | def __init__(self):
    method extract_grads (line 55) | def extract_grads(self, X, X_recon, t_mean, t_log_var):
  class TorchWGANGPLoss (line 82) | class TorchWGANGPLoss(nn.Module):
    method __init__ (line 83) | def __init__(self, lambda_=10):
    method forward (line 87) | def forward(self, Y_real, Y_fake, gradInterp):
    method extract_grads (line 103) | def extract_grads(self, Y_real, Y_fake, gradInterp):
  class TorchLinearActivation (line 124) | class TorchLinearActivation(nn.Module):
    method __init__ (line 125) | def __init__(self):
    method forward (line 130) | def forward(input):
    method backward (line 134) | def backward(grad_output):
  class TorchBatchNormLayer (line 138) | class TorchBatchNormLayer(nn.Module):
    method __init__ (line 139) | def __init__(self, n_in, params, mode, momentum=0.9, epsilon=1e-5):
    method forward (line 157) | def forward(self, X):
    method extract_grads (line 169) | def extract_grads(self, X, Y_true=None):
  class TorchLayerNormLayer (line 216) | class TorchLayerNormLayer(nn.Module):
    method __init__ (line 217) | def __init__(self, feat_dims, params, mode, epsilon=1e-5):
    method forward (line 236) | def forward(self, X):
    method extract_grads (line 248) | def extract_grads(self, X, Y_true=None):
  class TorchAddLayer (line 301) | class TorchAddLayer(nn.Module):
    method __init__ (line 302) | def __init__(self, act_fn, **kwargs):
    method forward (line 306) | def forward(self, Xs):
    method extract_grads (line 330) | def extract_grads(self, X):
  class TorchMultiplyLayer (line 347) | class TorchMultiplyLayer(nn.Module):
    method __init__ (line 348) | def __init__(self, act_fn, **kwargs):
    method forward (line 352) | def forward(self, Xs):
    method extract_grads (line 376) | def extract_grads(self, X):
  class TorchSkipConnectionIdentity (line 393) | class TorchSkipConnectionIdentity(nn.Module):
    method __init__ (line 394) | def __init__(self, act_fn, pad1, pad2, params, hparams, momentum=0.9, ...
    method forward (line 460) | def forward(self, X):
    method extract_grads (line 490) | def extract_grads(self, X):
  class TorchCausalConv1d (line 541) | class TorchCausalConv1d(torch.nn.Conv1d):
    method __init__ (line 548) | def __init__(
    method forward (line 571) | def forward(self, input):
  class TorchWavenetModule (line 578) | class TorchWavenetModule(nn.Module):
    method __init__ (line 579) | def __init__(self, params, hparams, conv_1x1_pad):
    method forward (line 618) | def forward(self, X_main, X_skip):
    method extract_grads (line 650) | def extract_grads(self, X_main, X_skip):
  class TorchSkipConnectionConv (line 715) | class TorchSkipConnectionConv(nn.Module):
    method __init__ (line 716) | def __init__(
    method forward (line 813) | def forward(self, X):
    method extract_grads (line 849) | def extract_grads(self, X):
  class TorchBidirectionalLSTM (line 920) | class TorchBidirectionalLSTM(nn.Module):
    method __init__ (line 921) | def __init__(self, n_in, n_out, params, **kwargs):
    method forward (line 1000) | def forward(self, X):
    method extract_grads (line 1018) | def extract_grads(self, X):
  class TorchPool2DLayer (line 1103) | class TorchPool2DLayer(nn.Module):
    method __init__ (line 1104) | def __init__(self, in_channels, hparams, **kwargs):
    method forward (line 1120) | def forward(self, X):
    method extract_grads (line 1131) | def extract_grads(self, X):
  class TorchConv2DLayer (line 1149) | class TorchConv2DLayer(nn.Module):
    method __init__ (line 1150) | def __init__(self, in_channels, out_channels, act_fn, params, hparams,...
    method forward (line 1175) | def forward(self, X):
    method extract_grads (line 1190) | def extract_grads(self, X):
  class TorchConv1DLayer (line 1213) | class TorchConv1DLayer(nn.Module):
    method __init__ (line 1214) | def __init__(self, in_channels, out_channels, act_fn, params, hparams,...
    method forward (line 1239) | def forward(self, X):
    method extract_grads (line 1254) | def extract_grads(self, X):
  class TorchDeconv2DLayer (line 1277) | class TorchDeconv2DLayer(nn.Module):
    method __init__ (line 1278) | def __init__(self, in_channels, out_channels, act_fn, params, hparams,...
    method forward (line 1303) | def forward(self, X):
    method extract_grads (line 1318) | def extract_grads(self, X):
  class TorchLSTMCell (line 1341) | class TorchLSTMCell(nn.Module):
    method __init__ (line 1342) | def __init__(self, n_in, n_out, params, **kwargs):
    method forward (line 1371) | def forward(self, X):
    method extract_grads (line 1411) | def extract_grads(self, X):
  class TorchRNNCell (line 1462) | class TorchRNNCell(nn.Module):
    method __init__ (line 1463) | def __init__(self, n_in, n_hid, params, **kwargs):
    method forward (line 1477) | def forward(self, X):
    method extract_grads (line 1508) | def extract_grads(self, X):
  class TorchFCLayer (line 1529) | class TorchFCLayer(nn.Module):
    method __init__ (line 1530) | def __init__(self, n_in, n_hid, act_fn, params, **kwargs):
    method forward (line 1544) | def forward(self, X):
    method extract_grads (line 1555) | def extract_grads(self, X):
  class TorchEmbeddingLayer (line 1573) | class TorchEmbeddingLayer(nn.Module):
    method __init__ (line 1574) | def __init__(self, vocab_size, n_out, params, **kwargs):
    method forward (line 1582) | def forward(self, X):
    method extract_grads (line 1590) | def extract_grads(self, X):
  class TorchSDPAttentionLayer (line 1604) | class TorchSDPAttentionLayer(nn.Module):
    method __init__ (line 1605) | def __init__(self):
    method forward (line 1608) | def forward(self, Q, K, V, mask=None):
    method extract_grads (line 1636) | def extract_grads(self, Q, K, V, mask=None):
  class TorchMultiHeadedAttentionModule (line 1657) | class TorchMultiHeadedAttentionModule(nn.Module):
    method __init__ (line 1658) | def __init__(self, params, hparams):
    method forward (line 1699) | def forward(self, Q, K, V, mask=None):
    method extract_grads (line 1761) | def extract_grads(self, Q, K, V, mask=None):
  function param (line 1816) | def param(name, *args, **kwargs):
  function params_with_name (line 1842) | def params_with_name(name):
  function ReLULayer (line 1846) | def ReLULayer(name, n_in, n_out, inputs, w_initialization):
  function LinearLayer (line 1859) | def LinearLayer(name, n_in, n_out, inputs, w_initialization):
  function Generator (line 1871) | def Generator(n_samples, X_real, params=None):
  function Discriminator (line 1901) | def Discriminator(inputs, params=None):
  function WGAN_GP_tf (line 1944) | def WGAN_GP_tf(X, lambda_, params, batch_size):
  function TFNCELoss (line 2192) | def TFNCELoss(X, target_word, L):

FILE: numpy_ml/tests/test_glm.py
  function test_glm (line 10) | def test_glm(N=20):

FILE: numpy_ml/tests/test_linear_regression.py
  function test_linear_regression (line 10) | def test_linear_regression(N=10):

FILE: numpy_ml/tests/test_naive_bayes.py
  function test_GaussianNB (line 12) | def test_GaussianNB(N=10):

FILE: numpy_ml/tests/test_ngram.py
  class MLEGold (line 12) | class MLEGold:
    method __init__ (line 13) | def __init__(
    method train (line 30) | def train(self, corpus_fp, vocab=None, encoding=None):
    method log_prob (line 90) | def log_prob(self, words, N):
    method _log_ngram_prob (line 102) | def _log_ngram_prob(self, ngram):
  class AdditiveGold (line 107) | class AdditiveGold:
    method __init__ (line 108) | def __init__(
    method train (line 125) | def train(self, corpus_fp, vocab=None, encoding=None):
    method log_prob (line 185) | def log_prob(self, words, N):
    method _log_ngram_prob (line 197) | def _log_ngram_prob(self, ngram):
  function test_mle (line 202) | def test_mle():
  function test_additive (line 227) | def test_additive():

FILE: numpy_ml/tests/test_nn.py
  function err_fmt (line 64) | def err_fmt(params, golds, ix, warn_str=""):
  function test_squared_error (line 84) | def test_squared_error(N=15):
  function test_cross_entropy (line 117) | def test_cross_entropy(N=15):
  function test_VAE_loss (line 147) | def test_VAE_loss(N=15):
  function test_WGAN_GP_loss (line 189) | def test_WGAN_GP_loss(N=5):
  function test_NCELoss (line 241) | def test_NCELoss(N=1):
  function test_squared_error_grad (line 339) | def test_squared_error_grad(N=15):
  function test_cross_entropy_grad (line 368) | def test_cross_entropy_grad(N=15):
  function test_sigmoid_activation (line 401) | def test_sigmoid_activation(N=15):
  function test_elu_activation (line 420) | def test_elu_activation(N=15):
  function test_softmax_activation (line 442) | def test_softmax_activation(N=15):
  function test_relu_activation (line 461) | def test_relu_activation(N=15):
  function test_softplus_activation (line 480) | def test_softplus_activation(N=15):
  function test_sigmoid_grad (line 504) | def test_sigmoid_grad(N=15):
  function test_elu_grad (line 524) | def test_elu_grad(N=15):
  function test_tanh_grad (line 545) | def test_tanh_grad(N=15):
  function test_relu_grad (line 565) | def test_relu_grad(N=15):
  function test_softmax_grad (line 585) | def test_softmax_grad(N=15):
  function test_softplus_grad (line 615) | def test_softplus_grad(N=15):
  function test_FullyConnected (line 640) | def test_FullyConnected(N=15):
  function test_Embedding (line 699) | def test_Embedding(N=15):
  function test_BatchNorm1D (line 748) | def test_BatchNorm1D(N=15):
  function test_LayerNorm1D (line 800) | def test_LayerNorm1D(N=15):
  function test_LayerNorm2D (line 846) | def test_LayerNorm2D(N=15):
  function test_MultiplyLayer (line 899) | def test_MultiplyLayer(N=15):
  function test_AddLayer (line 955) | def test_AddLayer(N=15):
  function test_BatchNorm2D (line 1011) | def test_BatchNorm2D(N=15):
  function test_RNNCell (line 1067) | def test_RNNCell(N=15):
  function test_Conv2D (line 1130) | def test_Conv2D(N=15):
  function test_DPAttention (line 1216) | def test_DPAttention(N=15):
  function test_Conv1D (line 1267) | def test_Conv1D(N=15):
  function test_Deconv2D (line 1348) | def test_Deconv2D(N=15):
  function test_Pool2D (line 1430) | def test_Pool2D(N=15):
  function test_LSTMCell (line 1482) | def test_LSTMCell(N=15):
  function grad_check_RNN (line 1559) | def grad_check_RNN(model, loss_func, param_name, n_t, X, epsilon=1e-7):
  function test_MultiHeadedAttentionModule (line 1606) | def test_MultiHeadedAttentionModule(N=15):
  function test_SkipConnectionIdentityModule (line 1692) | def test_SkipConnectionIdentityModule(N=15):
  function test_SkipConnectionConvModule (line 1822) | def test_SkipConnectionConvModule(N=15):
  function test_BidirectionalLSTM (line 1990) | def test_BidirectionalLSTM(N=15):
  function test_WaveNetModule (line 2072) | def test_WaveNetModule(N=10):
  function test_pad1D (line 2174) | def test_pad1D(N=15):
  function test_conv (line 2273) | def test_conv(N=15):
  function fit_VAE (line 2307) | def fit_VAE():
  function test_WGAN_GP (line 2326) | def test_WGAN_GP(N=1):

FILE: numpy_ml/tests/test_nn_activations.py
  function torch_gradient_generator (line 14) | def torch_gradient_generator(fn, **kwargs):
  function err_fmt (line 30) | def err_fmt(params, golds, ix, warn_str=""):
  function test_sigmoid_activation (line 96) | def test_sigmoid_activation(N=50):
  function test_softplus_activation (line 113) | def test_softplus_activation(N=50):
  function test_elu_activation (line 130) | def test_elu_activation(N=50):
  function test_relu_activation (line 150) | def test_relu_activation(N=50):
  function test_selu_activation (line 167) | def test_selu_activation(N=50):
  function test_leakyrelu_activation (line 184) | def test_leakyrelu_activation(N=50):
  function test_gelu_activation (line 203) | def test_gelu_activation(N=50):
  function test_sigmoid_grad (line 229) | def test_sigmoid_grad(N=50):
  function test_elu_grad (line 247) | def test_elu_grad(N=50):
  function test_tanh_grad (line 266) | def test_tanh_grad(N=50):
  function test_relu_grad (line 284) | def test_relu_grad(N=50):
  function test_gelu_grad (line 302) | def test_gelu_grad(N=50):
  function test_selu_grad (line 322) | def test_selu_grad(N=50):
  function test_leakyrelu_grad (line 340) | def test_leakyrelu_grad(N=50):
  function test_softplus_grad (line 359) | def test_softplus_grad(N=50):

FILE: numpy_ml/tests/test_nonparametric.py
  function test_knn_regression (line 12) | def test_knn_regression(N=15):
  function test_knn_clf (line 50) | def test_knn_clf(N=15):
  function test_gp_regression (line 87) | def test_gp_regression(N=15):

FILE: numpy_ml/tests/test_preprocessing.py
  function test_huffman (line 36) | def test_huffman(N=15):
  function test_standardizer (line 56) | def test_standardizer(N=15):
  function test_tfidf (line 79) | def test_tfidf(N=15):
  function test_dct (line 119) | def test_dct(N=15):
  function test_dft (line 135) | def test_dft(N=15):
  function test_mfcc (line 150) | def test_mfcc(N=1):
  function test_framing (line 196) | def test_framing(N=15):
  function test_dft_bins (line 217) | def test_dft_bins(N=15):
  function test_mel_filterbank (line 232) | def test_mel_filterbank(N=15):

FILE: numpy_ml/tests/test_trees.py
  function clone_tree (line 16) | def clone_tree(dtree):
  function compare_trees (line 39) | def compare_trees(mine, gold):
  function test_DecisionTree (line 70) | def test_DecisionTree(N=1):
  function test_RandomForest (line 154) | def test_RandomForest(N=1):
  function test_gbdt (line 255) | def test_gbdt(N=1):

FILE: numpy_ml/tests/test_utils.py
  function test_linear_kernel (line 35) | def test_linear_kernel(N=1):
  function test_polynomial_kernel (line 54) | def test_polynomial_kernel(N=1):
  function test_radial_basis_kernel (line 76) | def test_radial_basis_kernel(N=1):
  function test_euclidean (line 105) | def test_euclidean(N=1):
  function test_hamming (line 119) | def test_hamming(N=1):
  function test_minkowski (line 133) | def test_minkowski(N=1):
  function test_chebyshev (line 148) | def test_chebyshev(N=1):
  function test_manhattan (line 162) | def test_manhattan(N=1):
  function test_ball_tree (line 181) | def test_ball_tree(N=1):
  function from_networkx (line 225) | def from_networkx(G_nx):
  function to_networkx (line 241) | def to_networkx(G):
  function test_all_paths (line 256) | def test_all_paths(N=1):
  function test_random_DAG (line 288) | def test_random_DAG(N=1):
  function test_topological_ordering (line 303) | def test_topological_ordering(N=1):
  function test_is_acyclic (line 326) | def test_is_acyclic(N=1):

FILE: numpy_ml/trees/dt.py
  class Node (line 4) | class Node:
    method __init__ (line 5) | def __init__(self, left, right, rule):
  class Leaf (line 12) | class Leaf:
    method __init__ (line 13) | def __init__(self, value):
  class DecisionTree (line 21) | class DecisionTree:
    method __init__ (line 22) | def __init__(
    method fit (line 70) | def fit(self, X, Y):
    method predict (line 87) | def predict(self, X):
    method predict_class_probs (line 104) | def predict_class_probs(self, X):
    method _grow (line 122) | def _grow(self, X, Y, cur_depth=0):
    method _segment (line 153) | def _segment(self, X, Y, feat_idxs):
    method _impurity_gain (line 173) | def _impurity_gain(self, Y, split_thresh, feat_values):
    method _traverse (line 205) | def _traverse(self, X, node, prob=False):
  function mse (line 215) | def mse(y):
  function entropy (line 222) | def entropy(y):
  function gini (line 231) | def gini(y):

FILE: numpy_ml/trees/gbdt.py
  function to_one_hot (line 7) | def to_one_hot(labels, n_classes=None):
  class GradientBoostedDecisionTree (line 18) | class GradientBoostedDecisionTree:
    method __init__ (line 19) | def __init__(
    method fit (line 94) | def fit(self, X, Y):
    method predict (line 158) | def predict(self, X):

FILE: numpy_ml/trees/losses.py
  class ClassProbEstimator (line 8) | class ClassProbEstimator:
    method fit (line 9) | def fit(self, X, y):
    method predict (line 12) | def predict(self, X):
  class MeanBaseEstimator (line 18) | class MeanBaseEstimator:
    method fit (line 19) | def fit(self, X, y):
    method predict (line 22) | def predict(self, X):
  class MSELoss (line 33) | class MSELoss:
    method __call__ (line 34) | def __call__(self, y, y_pred):
    method base_estimator (line 37) | def base_estimator(self):
    method grad (line 40) | def grad(self, y, y_pred):
    method line_search (line 43) | def line_search(self, y, y_pred, h_pred):
  class CrossEntropyLoss (line 52) | class CrossEntropyLoss:
    method __call__ (line 53) | def __call__(self, y, y_pred):
    method base_estimator (line 57) | def base_estimator(self):
    method grad (line 60) | def grad(self, y, y_pred):
    method line_search (line 64) | def line_search(self, y, y_pred, h_pred):

FILE: numpy_ml/trees/rf.py
  function bootstrap_sample (line 5) | def bootstrap_sample(X, Y):
  class RandomForest (line 11) | class RandomForest:
    method __init__ (line 12) | def __init__(
    method fit (line 44) | def fit(self, X, Y):
    method predict (line 61) | def predict(self, X):
    method _vote (line 78) | def _vote(self, predictions):

FILE: numpy_ml/utils/data_structures.py
  class PQNode (line 14) | class PQNode(object):
    method __init__ (line 15) | def __init__(self, key, val, priority, entry_id, **kwargs):
    method __repr__ (line 22) | def __repr__(self):
    method to_dict (line 26) | def to_dict(self):
    method __gt__ (line 32) | def __gt__(self, other):
    method __ge__ (line 39) | def __ge__(self, other):
    method __lt__ (line 44) | def __lt__(self, other):
    method __le__ (line 51) | def __le__(self, other):
  class PriorityQueue (line 57) | class PriorityQueue:
    method __init__ (line 58) | def __init__(self, capacity, heap_order="max"):
    method __repr__ (line 86) | def __repr__(self):
    method __len__ (line 90) | def __len__(self):
    method __iter__ (line 93) | def __iter__(self):
    method push (line 96) | def push(self, key, priority, val=None):
    method pop (line 127) | def pop(self):
    method peek (line 148) | def peek(self):
  class BallTreeNode (line 176) | class BallTreeNode:
    method __init__ (line 177) | def __init__(self, centroid=None, X=None, y=None):
    method __repr__ (line 187) | def __repr__(self):
    method to_dict (line 191) | def to_dict(self):
  class BallTree (line 197) | class BallTree:
    method __init__ (line 198) | def __init__(self, leaf_size=40, metric=None):
    method fit (line 234) | def fit(self, X, y=None):
    method _build_tree (line 259) | def _build_tree(self, X, y):
    method _split (line 274) | def _split(self, X, y=None):
    method nearest_neighbors (line 292) | def nearest_neighbors(self, k, x):
    method _knn (line 319) | def _knn(self, k, x, PQ, root):
  class DiscreteSampler (line 346) | class DiscreteSampler:
    method __init__ (line 347) | def __init__(self, probs, log=False, with_replacement=True):
    method __call__ (line 423) | def __call__(self, n_samples=1):
    method sample (line 441) | def sample(self, n_samples=1):
  class Dict (line 478) | class Dict(dict):
    method __init__ (line 479) | def __init__(self, encoder=None):
    method __setitem__ (line 495) | def __setitem__(self, key, value):
    method _encode_key (line 502) | def _encode_key(self, key):
    method __getitem__ (line 513) | def __getitem__(self, key):
    method __missing__ (line 521) | def __missing__(self, key):

FILE: numpy_ml/utils/distance_metrics.py
  function euclidean (line 4) | def euclidean(x, y):
  function manhattan (line 29) | def manhattan(x, y):
  function chebyshev (line 54) | def chebyshev(x, y):
  function minkowski (line 79) | def minkowski(x, y, p):
  function hamming (line 109) | def hamming(x, y):

FILE: numpy_ml/utils/graphs.py
  class Edge (line 12) | class Edge(object):
    method __init__ (line 13) | def __init__(self, fr, to, w=None):
    method __repr__ (line 33) | def __repr__(self):
    method weight (line 37) | def weight(self):
    method reverse (line 40) | def reverse(self):
  class Graph (line 50) | class Graph(ABC):
    method __init__ (line 51) | def __init__(self, V, E):
    method __getitem__ (line 60) | def __getitem__(self, v_i):
    method get_index (line 63) | def get_index(self, v):
    method get_vertex (line 67) | def get_vertex(self, v_i):
    method vertices (line 72) | def vertices(self):
    method indices (line 76) | def indices(self):
    method edges (line 80) | def edges(self):
    method get_neighbors (line 83) | def get_neighbors(self, v_i):
    method to_matrix (line 90) | def to_matrix(self):
    method to_adj_dict (line 98) | def to_adj_dict(self):
    method path_exists (line 105) | def path_exists(self, s_i, e_i):
    method all_paths (line 132) | def all_paths(self, s_i, e_i):
    method _build_adjacency_list (line 169) | def _build_adjacency_list(self):
  class DiGraph (line 173) | class DiGraph(Graph):
    method __init__ (line 174) | def __init__(self, V, E):
    method _build_adjacency_list (line 189) | def _build_adjacency_list(self):
    method reverse (line 196) | def reverse(self):
    method topological_ordering (line 200) | def topological_ordering(self):
    method is_acyclic (line 261) | def is_acyclic(self):
  class UndirectedGraph (line 266) | class UndirectedGraph(Graph):
    method __init__ (line 267) | def __init__(self, V, E):
    method _build_adjacency_list (line 285) | def _build_adjacency_list(self):
  function random_unweighted_graph (line 302) | def random_unweighted_graph(n_vertices, edge_prob=0.5, directed=False):
  function random_DAG (line 336) | def random_DAG(n_vertices, edge_prob=0.5):

FILE: numpy_ml/utils/kernels.py
  class KernelBase (line 7) | class KernelBase(ABC):
    method __init__ (line 8) | def __init__(self):
    method _kernel (line 14) | def _kernel(self, X, Y):
    method __call__ (line 17) | def __call__(self, X, Y=None):
    method __str__ (line 21) | def __str__(self):
    method summary (line 26) | def summary(self):
    method set_params (line 34) | def set_params(self, summary_dict):
  class LinearKernel (line 72) | class LinearKernel(KernelBase):
    method __init__ (line 73) | def __init__(self, c0=0):
    method _kernel (line 96) | def _kernel(self, X, Y=None):
  class PolynomialKernel (line 119) | class PolynomialKernel(KernelBase):
    method __init__ (line 120) | def __init__(self, d=3, gamma=None, c0=1):
    method _kernel (line 159) | def _kernel(self, X, Y=None):
  class RBFKernel (line 184) | class RBFKernel(KernelBase):
    method __init__ (line 185) | def __init__(self, sigma=None):
    method _kernel (line 217) | def _kernel(self, X, Y=None):
  class KernelInitializer (line 241) | class KernelInitializer(object):
    method __init__ (line 242) | def __init__(self, param=None):
    method __call__ (line 254) | def __call__(self):
    method init_from_str (line 266) | def init_from_str(self):
    method init_from_dict (line 281) | def init_from_dict(self):
  function kernel_checks (line 299) | def kernel_checks(X, Y):
  function pairwise_l2_distances (line 310) | def pairwise_l2_distances(X, Y):

FILE: numpy_ml/utils/misc.py
  function logsumexp (line 5) | def logsumexp(log_probs, axis=None):
  function log_gaussian_pdf (line 16) | def log_gaussian_pdf(x_i, mu, sigma):

FILE: numpy_ml/utils/testing.py
  function is_symmetric (line 11) | def is_symmetric(X):
  function is_symmetric_positive_definite (line 16) | def is_symmetric_positive_definite(X):
  function is_stochastic (line 30) | def is_stochastic(X):
  function is_number (line 38) | def is_number(a):
  function is_one_hot (line 43) | def is_one_hot(x):
  function is_binary (line 51) | def is_binary(x):
  function random_one_hot_matrix (line 63) | def random_one_hot_matrix(n_examples, n_classes):
  function random_stochastic_matrix (line 70) | def random_stochastic_matrix(n_examples, n_classes):
  function random_tensor (line 77) | def random_tensor(shape, standardize=False):
  function random_binary_tensor (line 91) | def random_binary_tensor(shape, sparsity=0.5):
  function random_paragraph (line 99) | def random_paragraph(n_words, vocab=None):
  class DependencyWarning (line 142) | class DependencyWarning(RuntimeWarning):

FILE: numpy_ml/utils/windows.py
  function blackman_harris (line 4) | def blackman_harris(window_len, symmetric=False):
  function hamming (line 43) | def hamming(window_len, symmetric=False):
  function hann (line 76) | def hann(window_len, symmetric=False):
  function generalized_cosine (line 108) | def generalized_cosine(window_len, coefs, symmetric=False):
  class WindowInitializer (line 145) | class WindowInitializer:
    method __call__ (line 146) | def __call__(self, window):

Download .json

Condensed preview — 194 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (1,379K chars).

[
  {
    "path": ".github/ISSUE_TEMPLATE/a--bug-performance-issue.md",
    "chars": 656,
    "preview": "---\nname: Bug/Performance Issue\nabout: Use this template for reporting a bug or a performance issue.\nlabels: bugfix\n---\n"
  },
  {
    "path": ".github/PULL_REQUEST_TEMPLATE.md",
    "chars": 983,
    "preview": "### All Submissions\n\n* [ ] Is the code you are submitting your own work?\n* [ ] Have you followed the [contributing guide"
  },
  {
    "path": ".gitignore",
    "chars": 2068,
    "preview": "### OSX ###\n# General\n.DS_Store\n.AppleDouble\n.LSOverride\n\n# Icon must end with two \\r\nIcon\n\n# Thumbnails\n._*\n\n# Files th"
  },
  {
    "path": ".readthedocs.yml",
    "chars": 560,
    "preview": "# Read the Docs configuration file\n# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details\n\n# Requir"
  },
  {
    "path": "CODE_OF_CONDUCT.md",
    "chars": 3358,
    "preview": "# NumPy-ML Code of Conduct\n\n## Our Pledge\n\nIn the interest of fostering an open and welcoming environment, we as\ncontrib"
  },
  {
    "path": "CONTRIBUTING.md",
    "chars": 2357,
    "preview": "## Contributing\n\nThank you for contributing to numpy-ml!\n\n| <p align=\"center\">⚠️ ⚠️ All PRs should reflect earnest attem"
  },
  {
    "path": "LICENSE",
    "chars": 35149,
    "preview": "                    GNU GENERAL PUBLIC LICENSE\n                       Version 3, 29 June 2007\n\n Copyright (C) 2007 Free "
  },
  {
    "path": "MANIFEST.in",
    "chars": 86,
    "preview": "include README.md\ninclude requirements*.txt\ninclude docs/*.rst\ninclude docs/img/*.png\n"
  },
  {
    "path": "README.md",
    "chars": 6162,
    "preview": "# numpy-ml\nEver wish you had an inefficient but somewhat legible collection of machine\nlearning algorithms implemented e"
  },
  {
    "path": "docs/Makefile",
    "chars": 581,
    "preview": "# Minimal makefile for Sphinx documentation\n#\n\n# You can set these variables from the command line.\nSPHINXOPTS    =\nSPHI"
  },
  {
    "path": "docs/README.md",
    "chars": 231,
    "preview": "To build the documentation locally, [install sphinx](http://www.sphinx-doc.org/en/master/usage/installation.html), cd in"
  },
  {
    "path": "docs/conf.py",
    "chars": 10786,
    "preview": "# -*- coding: utf-8 -*-\n#\n# Configuration file for the Sphinx documentation builder.\n#\n# This file does only contain a s"
  },
  {
    "path": "docs/index.rst",
    "chars": 1438,
    "preview": "Welcome to numpy-ml\n===================\n`numpy-ml`_ is a growing collection of machine learning models, algorithms, and\n"
  },
  {
    "path": "docs/make.bat",
    "chars": 787,
    "preview": "@ECHO OFF\r\n\r\npushd %~dp0\r\n\r\nREM Command file for Sphinx documentation\r\n\r\nif \"%SPHINXBUILD%\" == \"\" (\r\n\tset SPHINXBUILD=sp"
  },
  {
    "path": "docs/numpy_ml.bandits.bandits.rst",
    "chars": 1128,
    "preview": "Bandit Environments\n===================\n\n``Bandit``\n-----------\n.. autoclass:: numpy_ml.bandits.bandits.Bandit\n    :memb"
  },
  {
    "path": "docs/numpy_ml.bandits.policies.rst",
    "chars": 793,
    "preview": "Policies\n=========\n\n``BanditPolicyBase``\n--------------------\n.. autoclass:: numpy_ml.bandits.policies.BanditPolicyBase\n"
  },
  {
    "path": "docs/numpy_ml.bandits.rst",
    "chars": 158,
    "preview": "Multi-armed bandits\n###################\n\n.. toctree::\n   :maxdepth: 3\n\n   numpy_ml.bandits.bandits\n\n   numpy_ml.bandits."
  },
  {
    "path": "docs/numpy_ml.bandits.trainer.rst",
    "chars": 166,
    "preview": "Trainer\n=======\n\n``BanditTrainer``\n------------------\n.. autoclass:: numpy_ml.bandits.trainer.BanditTrainer\n    :members"
  },
  {
    "path": "docs/numpy_ml.factorization.factors.rst",
    "chars": 207,
    "preview": "``VanillaALS``\n--------------\n.. autoclass:: numpy_ml.factorization.VanillaALS\n    :members:\n    :undoc-members:\n\n``NMF`"
  },
  {
    "path": "docs/numpy_ml.factorization.rst",
    "chars": 107,
    "preview": "Matrix factorization\n####################\n\n.. toctree::\n   :maxdepth: 3\n\n   numpy_ml.factorization.factors\n"
  },
  {
    "path": "docs/numpy_ml.gmm.gmm.rst",
    "chars": 98,
    "preview": "``GMM``\n-------\n\n.. autoclass:: numpy_ml.gmm.GMM\n\t:members:\n\t:undoc-members:\n\t:inherited-members:\n"
  },
  {
    "path": "docs/numpy_ml.gmm.rst",
    "chars": 2710,
    "preview": "#######################\nGaussian mixture models\n#######################\n\nA `Gaussian mixture model`_ (GMM) is a latent v"
  },
  {
    "path": "docs/numpy_ml.hmm.MultinomialHMM.rst",
    "chars": 131,
    "preview": "``MultinomialHMM``\n------------------\n\n.. autoclass:: numpy_ml.hmm.MultinomialHMM\n\t:members:\n\t:undoc-members:\n\t:inherite"
  },
  {
    "path": "docs/numpy_ml.hmm.rst",
    "chars": 2290,
    "preview": "####################\nHidden Markov models\n####################\n\nA `hidden Markov model`_ (HMM) is a generative model for"
  },
  {
    "path": "docs/numpy_ml.lda.lda.rst",
    "chars": 107,
    "preview": "``LDA``\n=======\n\n.. autoclass:: numpy_ml.lda.LDA\n    :members:\n    :undoc-members:\n    :inherited-members:\n"
  },
  {
    "path": "docs/numpy_ml.lda.rst",
    "chars": 3356,
    "preview": "###########################\nLatent Dirichlet allocation\n###########################\n\n`Latent Dirichlet allocation`_ (LDA"
  },
  {
    "path": "docs/numpy_ml.lda.smoothed_lda.rst",
    "chars": 154,
    "preview": "``SmoothedLDA``\n===============\n\n.. autoclass:: numpy_ml.lda.SmoothedLDA\n    :members:\n    :undoc-members:\n    :inherite"
  },
  {
    "path": "docs/numpy_ml.linear_models.lm.rst",
    "chars": 1232,
    "preview": "``LinearRegression``\n--------------------\n\n.. autoclass:: numpy_ml.linear_models.LinearRegression\n\t:members:\n\t:undoc-mem"
  },
  {
    "path": "docs/numpy_ml.linear_models.rst",
    "chars": 8485,
    "preview": "Linear models\n#############\n\n.. raw:: html\n\n   <h2>Ordinary and Weighted Linear Least Squares</h2>\n\nIn weighted linear l"
  },
  {
    "path": "docs/numpy_ml.neural_nets.activations.rst",
    "chars": 1790,
    "preview": "Activations\n===========\n\nPopular (and some not-so-popular) activation functions for use within arbitrary\nneural networks"
  },
  {
    "path": "docs/numpy_ml.neural_nets.initializers.rst",
    "chars": 745,
    "preview": "Initializers\n=============\n\n``ActivationInitializer``\n--------------------------\n.. autoclass:: numpy_ml.neural_nets.ini"
  },
  {
    "path": "docs/numpy_ml.neural_nets.layers.rst",
    "chars": 3070,
    "preview": "Layers\n======\n\n``LayerBase``\n-------------\n.. autoclass:: numpy_ml.neural_nets.layers.layers.LayerBase\n    :members:\n   "
  },
  {
    "path": "docs/numpy_ml.neural_nets.losses.rst",
    "chars": 742,
    "preview": "Loss functions\n==============\n\n``CrossEntropy``\n----------------\n.. autoclass:: numpy_ml.neural_nets.losses.CrossEntropy"
  },
  {
    "path": "docs/numpy_ml.neural_nets.models.rst",
    "chars": 448,
    "preview": "Full networks\n==============\n\n``WGAN_GP``\n----------\n.. autoclass:: numpy_ml.neural_nets.models.WGAN_GP\n    :members:\n  "
  },
  {
    "path": "docs/numpy_ml.neural_nets.modules.rst",
    "chars": 820,
    "preview": "Modules\n========\n\n``BidirectionalLSTM``\n---------------------\n.. autoclass:: numpy_ml.neural_nets.modules.BidirectionalL"
  },
  {
    "path": "docs/numpy_ml.neural_nets.optimizers.rst",
    "chars": 1146,
    "preview": "Optimizers\n===========\nPopular gradient-based strategies for optimizing parameters in neural networks.\n\nFor a discussion"
  },
  {
    "path": "docs/numpy_ml.neural_nets.rst",
    "chars": 14964,
    "preview": "Neural networks\n###############\nThe neural network module includes common building blocks for implementing\nmodern `deep "
  },
  {
    "path": "docs/numpy_ml.neural_nets.schedulers.rst",
    "chars": 722,
    "preview": "Learning rate schedulers\n=========================\n\n``ConstantScheduler``\n---------------------\n.. autoclass:: numpy_ml."
  },
  {
    "path": "docs/numpy_ml.neural_nets.utils.rst",
    "chars": 1827,
    "preview": "Utilities\n==========\n\n``minibatch``\n-------------\n.. autofunction:: numpy_ml.neural_nets.utils.minibatch\n\n``calc_pad_dim"
  },
  {
    "path": "docs/numpy_ml.neural_nets.wrappers.rst",
    "chars": 311,
    "preview": "Wrappers\n=========\n\n``WrapperBase``\n---------------\n.. autoclass:: numpy_ml.neural_nets.wrappers.wrappers.WrapperBase\n  "
  },
  {
    "path": "docs/numpy_ml.ngram.additive.rst",
    "chars": 130,
    "preview": "``AdditiveNGram``\n-----------------\n\n.. autoclass:: numpy_ml.ngram.AdditiveNGram\n\t:members:\n\t:undoc-members:\n\t:inherited"
  },
  {
    "path": "docs/numpy_ml.ngram.goodturing.rst",
    "chars": 136,
    "preview": "``GoodTuringNGram``\n-------------------\n\n.. autoclass:: numpy_ml.ngram.GoodTuringNGram\n\t:members:\n\t:undoc-members:\n\t:inh"
  },
  {
    "path": "docs/numpy_ml.ngram.mle.rst",
    "chars": 122,
    "preview": "``MLENGram``\n------------\n\n.. autoclass:: numpy_ml.ngram.MLENGram\n        :members:\n\t:undoc-members:\n\t:inherited-members"
  },
  {
    "path": "docs/numpy_ml.ngram.rst",
    "chars": 3038,
    "preview": "#######################\nN-gram smoothing models\n#######################\n\nWhen dealing with `n-gram`_ models, smoothing r"
  },
  {
    "path": "docs/numpy_ml.nonparametric.gp.rst",
    "chars": 136,
    "preview": "``GPRegression``\n#################\n\n.. autoclass:: numpy_ml.nonparametric.GPRegression\n\t:members:\n\t:undoc-members:\n\t:inh"
  },
  {
    "path": "docs/numpy_ml.nonparametric.kernel_regression.rst",
    "chars": 148,
    "preview": "``KernelRegression``\n#####################\n\n.. autoclass:: numpy_ml.nonparametric.KernelRegression\n\t:members:\n\t:undoc-me"
  },
  {
    "path": "docs/numpy_ml.nonparametric.knn.rst",
    "chars": 108,
    "preview": "``KNN``\n#######\n\n.. autoclass:: numpy_ml.nonparametric.KNN\n\t:members:\n\t:undoc-members:\n\t:inherited-members:\n"
  },
  {
    "path": "docs/numpy_ml.nonparametric.rst",
    "chars": 4879,
    "preview": "Nonparametric models\n####################\n\n.. raw:: html\n\n   <h2>K-Nearest Neighbors</h2>\n\nThe `k-nearest neighbors`_ (K"
  },
  {
    "path": "docs/numpy_ml.preprocessing.dsp.rst",
    "chars": 1715,
    "preview": "Digital signal processing\n#########################\n\n``DCT``\n-------\n.. autofunction:: numpy_ml.preprocessing.dsp.DCT\n\n`"
  },
  {
    "path": "docs/numpy_ml.preprocessing.general.rst",
    "chars": 552,
    "preview": "General\n#######\n\n``FeatureHasher``\n-----------------\n\n.. autoclass:: numpy_ml.preprocessing.general.FeatureHasher\n\t:memb"
  },
  {
    "path": "docs/numpy_ml.preprocessing.nlp.rst",
    "chars": 1577,
    "preview": "Natural language processing\n###########################\n\n``BytePairEncoder``\n-------------------\n\n.. autoclass:: numpy_m"
  },
  {
    "path": "docs/numpy_ml.preprocessing.rst",
    "chars": 155,
    "preview": "Preprocessing\n#############\n\n.. toctree::\n   :maxdepth: 3\n\n   numpy_ml.preprocessing.general\n\n   numpy_ml.preprocessing."
  },
  {
    "path": "docs/numpy_ml.rl_models.agents.rst",
    "chars": 2455,
    "preview": "Agents\n======\n\n``CrossEntropyAgent``\n---------------------\n.. autoclass:: numpy_ml.rl_models.agents.CrossEntropyAgent\n  "
  },
  {
    "path": "docs/numpy_ml.rl_models.rl_utils.rst",
    "chars": 103,
    "preview": "Utilities\n=========\n\n.. automodule:: numpy_ml.rl_models.rl_utils\n    :members:\n    :inherited-members:\n"
  },
  {
    "path": "docs/numpy_ml.rl_models.rst",
    "chars": 169,
    "preview": "Reinforcement learning\n######################\n\n.. toctree::\n   :maxdepth: 3\n\n   numpy_ml.rl_models.agents\n\n   numpy_ml.r"
  },
  {
    "path": "docs/numpy_ml.rl_models.trainer.rst",
    "chars": 145,
    "preview": "Training\n========\n\n``Trainer``\n-----------\n\n.. automodule:: numpy_ml.rl_models.trainer\n    :members:\n    :undoc-members:"
  },
  {
    "path": "docs/numpy_ml.trees.dt.rst",
    "chars": 144,
    "preview": "################\n``DecisionTree``\n################\n\n.. autoclass:: numpy_ml.trees.DecisionTree\n\t:members:\n\t:undoc-member"
  },
  {
    "path": "docs/numpy_ml.trees.gbdt.rst",
    "chars": 172,
    "preview": "``GradientBoostedDecisionTree``\n###############################\n\n.. autoclass:: numpy_ml.trees.GradientBoostedDecisionTr"
  },
  {
    "path": "docs/numpy_ml.trees.losses.rst",
    "chars": 186,
    "preview": "#########################\nLosses (``trees.losses``)\n#########################\n\n.. automodule:: numpy_ml.trees.losses\n\t:m"
  },
  {
    "path": "docs/numpy_ml.trees.rf.rst",
    "chars": 127,
    "preview": "``RandomForest``\n################\n\n.. autoclass:: numpy_ml.trees.RandomForest\n\t:members:\n\t:undoc-members:\n\t:inherited-me"
  },
  {
    "path": "docs/numpy_ml.trees.rst",
    "chars": 6664,
    "preview": "Tree-based models\n#################\n.. raw:: html\n\n   <h2>Decision Trees</h2>\n\n`Decision trees`_ [1]_ are popular nonpar"
  },
  {
    "path": "docs/numpy_ml.utils.data_structures.rst",
    "chars": 724,
    "preview": "Data structures\n================\n\n``BallTree``\n------------\n\n.. autoclass:: numpy_ml.utils.data_structures.BallTree\n\t:me"
  },
  {
    "path": "docs/numpy_ml.utils.distance_metrics.rst",
    "chars": 509,
    "preview": "Distance metrics\n================\n\nCommon distance functions.\n\n``euclidean``\n---------------\n.. autofunction:: numpy_ml."
  },
  {
    "path": "docs/numpy_ml.utils.graphs.rst",
    "chars": 738,
    "preview": "Graphs\n======\n\n``Graph``\n---------\n.. autoclass:: numpy_ml.utils.graphs.Graph\n    :members:\n    :undoc-members:\n    :inh"
  },
  {
    "path": "docs/numpy_ml.utils.kernels.rst",
    "chars": 597,
    "preview": "Kernels\n=======\n\nA collection of common kernel / similarity functions. All kernels are\ncontinuous, bounded, and symmetri"
  },
  {
    "path": "docs/numpy_ml.utils.rst",
    "chars": 228,
    "preview": "Utilities\n#########\n\n.. toctree::\n   :maxdepth: 3\n\n   numpy_ml.utils.data_structures\n\n   numpy_ml.utils.distance_metrics"
  },
  {
    "path": "docs/numpy_ml.utils.testing.rst",
    "chars": 216,
    "preview": "Testing\n-------\nCommon helper functions for testing the ML algorithms in the rest of the repo.\n\n.. automodule:: numpy_ml"
  },
  {
    "path": "docs/numpy_ml.utils.windows.rst",
    "chars": 545,
    "preview": "Window functions\n================\nIn digital signal processing, windowing functions are useful to counteract the\nassumpt"
  },
  {
    "path": "docs/requirements.txt",
    "chars": 88,
    "preview": "numpy\nscipy\n\n# all this is for the dang tests\nmatplotlib\nseaborn\npandas\nsklearn\nhuffman\n"
  },
  {
    "path": "numpy_ml/README.md",
    "chars": 4607,
    "preview": "# Models\nThis repo includes code for the following models:\n\n1. **Gaussian mixture model**\n    - EM training\n\n2. **Hidden"
  },
  {
    "path": "numpy_ml/__init__.py",
    "chars": 371,
    "preview": "# noqa\n\"\"\"Common ML and ML-adjacent algorithms implemented in NumPy\"\"\"\n\nfrom . import utils\nfrom . import preprocessing\n"
  },
  {
    "path": "numpy_ml/bandits/README.md",
    "chars": 1037,
    "preview": "# Bandits\nThe `bandit.py` module includes several simple multi-arm bandit\nenvironments.\n\nThe `policies.py` module implem"
  },
  {
    "path": "numpy_ml/bandits/__init__.py",
    "chars": 68,
    "preview": "from .bandits import *\nfrom . import policies\nfrom . import trainer\n"
  },
  {
    "path": "numpy_ml/bandits/bandits.py",
    "chars": 17473,
    "preview": "\"\"\"A module containing different variations on multi-armed bandit environments.\"\"\"\n\nfrom abc import ABC, abstractmethod\n"
  },
  {
    "path": "numpy_ml/bandits/policies.py",
    "chars": 17224,
    "preview": "\"\"\"A module containing exploration policies for various multi-armed bandit problems.\"\"\"\n\nfrom abc import ABC, abstractme"
  },
  {
    "path": "numpy_ml/bandits/trainer.py",
    "chars": 13974,
    "preview": "\"\"\"A trainer/runner object for executing and comparing MAB policies.\"\"\"\n\nimport warnings\nimport os.path as op\nfrom colle"
  },
  {
    "path": "numpy_ml/factorization/README.md",
    "chars": 342,
    "preview": "# Factors\nThe `factors.py` module includes common approximate matrix-factorization\nalgorithms including:\n\n- Regularized "
  },
  {
    "path": "numpy_ml/factorization/__init__.py",
    "chars": 78,
    "preview": "\"\"\"Algorithms for approximate matrix factorization\"\"\"\n\nfrom .factors import *\n"
  },
  {
    "path": "numpy_ml/factorization/factors.py",
    "chars": 13123,
    "preview": "\"\"\"Algorithms for approximate matrix factorization\"\"\"\n\nfrom copy import deepcopy\n\nimport numpy as np\n\n\nclass VanillaALS:"
  },
  {
    "path": "numpy_ml/gmm/README.md",
    "chars": 401,
    "preview": "# Gaussian Mixture Models\nThe `gmm.py` module implements the standard (ie., non-Bayesian) [Gaussian mixture model](https"
  },
  {
    "path": "numpy_ml/gmm/__init__.py",
    "chars": 19,
    "preview": "from .gmm import *\n"
  },
  {
    "path": "numpy_ml/gmm/gmm.py",
    "chars": 7874,
    "preview": "\"\"\"A Gaussian mixture model class\"\"\"\nimport numpy as np\n\nfrom numpy_ml.utils.misc import logsumexp, log_gaussian_pdf\n\n\nc"
  },
  {
    "path": "numpy_ml/hmm/README.md",
    "chars": 466,
    "preview": "# Hidden Markov model\nThe `hmm.py` module implements a standard (i.e., non-Bayesian) [Hidden Markov\nmodel](https://en.wi"
  },
  {
    "path": "numpy_ml/hmm/__init__.py",
    "chars": 19,
    "preview": "from .hmm import *\n"
  },
  {
    "path": "numpy_ml/hmm/hmm.py",
    "chars": 25501,
    "preview": "\"\"\"Hidden Markov model module\"\"\"\n\nimport numpy as np\nfrom numpy_ml.utils.misc import logsumexp\n\n\nclass MultinomialHMM:\n "
  },
  {
    "path": "numpy_ml/lda/README.md",
    "chars": 635,
    "preview": "# Latent Dirichlet allocation\nThe `lda.py` module implements:\n\n1. [Standard (ie., non-Bayesian) latent Dirichlet\n   allo"
  },
  {
    "path": "numpy_ml/lda/__init__.py",
    "chars": 47,
    "preview": "from .lda import *\nfrom .lda_smoothed import *\n"
  },
  {
    "path": "numpy_ml/lda/lda.py",
    "chars": 7864,
    "preview": "import numpy as np\nfrom scipy.special import digamma, polygamma, gammaln\n\n\nclass LDA(object):\n    def __init__(self, T=1"
  },
  {
    "path": "numpy_ml/lda/lda_smoothed.py",
    "chars": 7258,
    "preview": "import numpy as np\n\n\nclass SmoothedLDA(object):\n    def __init__(self, T, **kwargs):\n        \"\"\"\n        A smoothed LDA "
  },
  {
    "path": "numpy_ml/linear_models/README.md",
    "chars": 1529,
    "preview": "# Linear Models\nThe `linear_models` module includes:\n\n1. [OLS linear regression](https://en.wikipedia.org/wiki/Ordinary_"
  },
  {
    "path": "numpy_ml/linear_models/__init__.py",
    "chars": 386,
    "preview": "\"\"\"A module containing assorted linear models.\"\"\"\n\nfrom .ridge import RidgeRegression\nfrom .glm import GeneralizedLinear"
  },
  {
    "path": "numpy_ml/linear_models/bayesian_regression.py",
    "chars": 11223,
    "preview": "\"\"\"A module of Bayesian linear regression models.\"\"\"\nimport numpy as np\nimport scipy.stats as stats\n\nfrom numpy_ml.utils"
  },
  {
    "path": "numpy_ml/linear_models/glm.py",
    "chars": 7867,
    "preview": "\"\"\"A module for the generalized linear model.\"\"\"\nimport numpy as np\n\nfrom numpy_ml.linear_models.linear_regression impor"
  },
  {
    "path": "numpy_ml/linear_models/linear_regression.py",
    "chars": 9560,
    "preview": "\"\"\"Linear regression module.\"\"\"\n\nimport numpy as np\n\n\nclass LinearRegression:\n    def __init__(self, fit_intercept=True)"
  },
  {
    "path": "numpy_ml/linear_models/logistic.py",
    "chars": 6471,
    "preview": "\"\"\"Logistic regression module\"\"\"\nimport numpy as np\n\n\nclass LogisticRegression:\n    def __init__(self, penalty=\"l2\", gam"
  },
  {
    "path": "numpy_ml/linear_models/naive_bayes.py",
    "chars": 7827,
    "preview": "\"\"\"A module for naive Bayes classifiers\"\"\"\nimport numpy as np\n\n\nclass GaussianNBClassifier:\n    def __init__(self, eps=1"
  },
  {
    "path": "numpy_ml/linear_models/ridge.py",
    "chars": 3603,
    "preview": "\"\"\"Ridge regression module\"\"\"\n\nimport numpy as np\n\n\nclass RidgeRegression:\n    def __init__(self, alpha=1, fit_intercept"
  },
  {
    "path": "numpy_ml/neural_nets/README.md",
    "chars": 6031,
    "preview": "# Neural network models\nThis module implements building-blocks for larger neural network models in the\nKeras-style. This"
  },
  {
    "path": "numpy_ml/neural_nets/__init__.py",
    "chars": 304,
    "preview": "\"\"\"A module of basic building blcoks for constructing neural networks\"\"\"\nfrom . import utils\nfrom . import losses\nfrom ."
  },
  {
    "path": "numpy_ml/neural_nets/activations/README.md",
    "chars": 782,
    "preview": "# Activation Functions\nThe `activations` module implements several common activation functions:\n\n- Rectified linear unit"
  },
  {
    "path": "numpy_ml/neural_nets/activations/__init__.py",
    "chars": 27,
    "preview": "from .activations import *\n"
  },
  {
    "path": "numpy_ml/neural_nets/activations/activations.py",
    "chars": 21641,
    "preview": "\"\"\"A collection of activation function objects for building neural networks\"\"\"\nfrom math import erf\nfrom abc import ABC,"
  },
  {
    "path": "numpy_ml/neural_nets/initializers/README.md",
    "chars": 202,
    "preview": "# Initializers\nThe `initializers.py` module contains objects for initializing optimizers,\nactivation functions, weight i"
  },
  {
    "path": "numpy_ml/neural_nets/initializers/__init__.py",
    "chars": 28,
    "preview": "from .initializers import *\n"
  },
  {
    "path": "numpy_ml/neural_nets/initializers/initializers.py",
    "chars": 10074,
    "preview": "\"\"\"A module containing objects to instantiate various neural network components.\"\"\"\nimport re\nfrom functools import part"
  },
  {
    "path": "numpy_ml/neural_nets/layers/README.md",
    "chars": 1590,
    "preview": "# Layers\nThe `layers.py` module implements common layers / layer-wise operations that can\nbe composed to create larger n"
  },
  {
    "path": "numpy_ml/neural_nets/layers/__init__.py",
    "chars": 22,
    "preview": "from .layers import *\n"
  },
  {
    "path": "numpy_ml/neural_nets/layers/layers.py",
    "chars": 161674,
    "preview": "\"\"\"A collection of composable layer objects for building neural networks\"\"\"\nfrom abc import ABC, abstractmethod\n\nimport "
  },
  {
    "path": "numpy_ml/neural_nets/losses/README.md",
    "chars": 543,
    "preview": "# Losses\n\nThe `losses.py` module implements several common loss functions, including:\n\n- Squared error\n- Cross-entropy\n-"
  },
  {
    "path": "numpy_ml/neural_nets/losses/__init__.py",
    "chars": 155,
    "preview": "\"\"\"\nCommon neural network loss functions.\n\nThis module implements loss objects that can be used during neural network\ntr"
  },
  {
    "path": "numpy_ml/neural_nets/losses/losses.py",
    "chars": 33746,
    "preview": "from abc import ABC, abstractmethod\n\nimport numpy as np\n\nfrom ...utils.testing import is_binary, is_stochastic\nfrom ..in"
  },
  {
    "path": "numpy_ml/neural_nets/models/README.md",
    "chars": 692,
    "preview": "# Models\n\nThe models module implements popular full neural networks. It includes:\n\n- `vae.py`: A Bernoulli variational a"
  },
  {
    "path": "numpy_ml/neural_nets/models/__init__.py",
    "chars": 61,
    "preview": "from .vae import *\nfrom .wgan_gp import *\nfrom .w2v import *\n"
  },
  {
    "path": "numpy_ml/neural_nets/models/vae.py",
    "chars": 16882,
    "preview": "from time import time\nfrom collections import OrderedDict\n\nimport numpy as np\n\nfrom ..losses import VAELoss\nfrom ..utils"
  },
  {
    "path": "numpy_ml/neural_nets/models/w2v.py",
    "chars": 18348,
    "preview": "from time import time\n\nimport numpy as np\n\nfrom ..layers import Embedding\nfrom ..losses import NCELoss\n\nfrom ...preproce"
  },
  {
    "path": "numpy_ml/neural_nets/models/wgan_gp.py",
    "chars": 18339,
    "preview": "from time import time\nfrom collections import OrderedDict\n\nimport numpy as np\n\nfrom ..utils import minibatch\nfrom ..laye"
  },
  {
    "path": "numpy_ml/neural_nets/modules/README.md",
    "chars": 764,
    "preview": "# Modules\n\nThe `modules.py` module implements common multi-layer blocks that appear across\nmany modern deep networks. It"
  },
  {
    "path": "numpy_ml/neural_nets/modules/__init__.py",
    "chars": 23,
    "preview": "from .modules import *\n"
  },
  {
    "path": "numpy_ml/neural_nets/modules/modules.py",
    "chars": 53285,
    "preview": "from abc import ABC, abstractmethod\n\nimport re\nimport numpy as np\n\nfrom ..wrappers import Dropout\nfrom ..utils import ca"
  },
  {
    "path": "numpy_ml/neural_nets/optimizers/README.md",
    "chars": 540,
    "preview": "# Optimizers\n\nThe `optimizers.py` module implements common modifications to stochastic gradient descent. It includes:\n\n-"
  },
  {
    "path": "numpy_ml/neural_nets/optimizers/__init__.py",
    "chars": 26,
    "preview": "from .optimizers import *\n"
  },
  {
    "path": "numpy_ml/neural_nets/optimizers/optimizers.py",
    "chars": 17533,
    "preview": "from copy import deepcopy\nfrom abc import ABC, abstractmethod\n\nimport numpy as np\nfrom numpy.linalg import norm\n\n\nclass "
  },
  {
    "path": "numpy_ml/neural_nets/schedulers/README.md",
    "chars": 436,
    "preview": "# Learning Rate Schedulers\nThe `schedulers` module implements several common strategies for learning rate\ndecay:\n\n- Cons"
  },
  {
    "path": "numpy_ml/neural_nets/schedulers/__init__.py",
    "chars": 26,
    "preview": "from .schedulers import *\n"
  },
  {
    "path": "numpy_ml/neural_nets/schedulers/schedulers.py",
    "chars": 12389,
    "preview": "from copy import deepcopy\nfrom abc import ABC, abstractmethod\n\nimport numpy as np\n\nfrom math import erf\n\n\ndef gaussian_c"
  },
  {
    "path": "numpy_ml/neural_nets/utils/README.md",
    "chars": 322,
    "preview": "# Utilities\n\nThe `utils.py` module implements common, neural network-specific helper\nfunctions, primarily for dealing wi"
  },
  {
    "path": "numpy_ml/neural_nets/utils/__init__.py",
    "chars": 196,
    "preview": "\"\"\"\nCommon neural network-specific helper functions.\n\nThe ``neural_nets.utils` module contains neural network-specific h"
  },
  {
    "path": "numpy_ml/neural_nets/utils/utils.py",
    "chars": 36459,
    "preview": "import numpy as np\n\n#######################################################################\n#                           "
  },
  {
    "path": "numpy_ml/neural_nets/wrappers/README.md",
    "chars": 208,
    "preview": "# Wrappers\n\nThe `wrappers.py` module implements wrappers for the layers in `layers.py`. It\nincludes\n- Dropout ([Srivasta"
  },
  {
    "path": "numpy_ml/neural_nets/wrappers/__init__.py",
    "chars": 24,
    "preview": "from .wrappers import *\n"
  },
  {
    "path": "numpy_ml/neural_nets/wrappers/wrappers.py",
    "chars": 8675,
    "preview": "\"\"\"\nA collection of objects thats can wrap / otherwise modify arbitrary neural\nnetwork layers.\n\"\"\"\n\nfrom abc import ABC,"
  },
  {
    "path": "numpy_ml/ngram/README.md",
    "chars": 679,
    "preview": "# N-Gram Sequence Models\nThe `ngram.py` module implements [n-gram models](https://en.wikipedia.org/wiki/N-gram) with dif"
  },
  {
    "path": "numpy_ml/ngram/__init__.py",
    "chars": 21,
    "preview": "from .ngram import *\n"
  },
  {
    "path": "numpy_ml/ngram/ngram.py",
    "chars": 24146,
    "preview": "\"\"\"A module for different N-gram smoothing models\"\"\"\nimport textwrap\nfrom abc import ABC, abstractmethod\nfrom collection"
  },
  {
    "path": "numpy_ml/nonparametric/README.md",
    "chars": 1151,
    "preview": "# Nonparametric Models\nThe nonparametric module implements several popular nonparameteric regression\nand classification "
  },
  {
    "path": "numpy_ml/nonparametric/__init__.py",
    "chars": 321,
    "preview": "\"\"\"\nPopular nonparameteric regression and classification models.\n\nThe nonparametric module contains an assortment of non"
  },
  {
    "path": "numpy_ml/nonparametric/gp.py",
    "chars": 9325,
    "preview": "import warnings\nimport numpy as np\nfrom numpy.linalg import slogdet, inv\n\ntry:\n    _SCIPY = True\n    from scipy.stats im"
  },
  {
    "path": "numpy_ml/nonparametric/kernel_regression.py",
    "chars": 2212,
    "preview": "from ..utils.kernels import KernelInitializer\n\n\nclass KernelRegression:\n    def __init__(self, kernel=None):\n        \"\"\""
  },
  {
    "path": "numpy_ml/nonparametric/knn.py",
    "chars": 3864,
    "preview": "\"\"\"A k-Nearest Neighbors (KNN) model for both classiciation and regression.\"\"\"\nfrom collections import Counter\n\nimport n"
  },
  {
    "path": "numpy_ml/plots/bandit_plots.py",
    "chars": 4820,
    "preview": "\"\"\"Miscellaneous plots for multi-arm bandit validation\"\"\"\n\nfrom collections import namedtuple\n\nimport numpy as np\n\nfrom "
  },
  {
    "path": "numpy_ml/plots/gmm_plots.py",
    "chars": 3209,
    "preview": "# flake8: noqa\nimport numpy as np\nfrom sklearn.datasets.samples_generator import make_blobs\n\nfrom scipy.stats import mul"
  },
  {
    "path": "numpy_ml/plots/hmm_plots.py",
    "chars": 5013,
    "preview": "# flake8: noqa\nimport numpy as np\nfrom matplotlib import pyplot as plt\n\nimport seaborn as sns\n\n# https://seaborn.pydata."
  },
  {
    "path": "numpy_ml/plots/lda_plots.py",
    "chars": 2482,
    "preview": "# flake8: noqa\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport seaborn as sns\n\n# https://seaborn.pydata.org/ge"
  },
  {
    "path": "numpy_ml/plots/lm_plots.py",
    "chars": 9793,
    "preview": "# flake8: noqa\nimport numpy as np\n\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.datasets.samples_ge"
  },
  {
    "path": "numpy_ml/plots/ngram_plots.py",
    "chars": 3953,
    "preview": "# flake8: noqa\nimport numpy as np\n\nimport matplotlib.pyplot as plt\nimport seaborn as sns\n\n# https://seaborn.pydata.org/g"
  },
  {
    "path": "numpy_ml/plots/nn_activations_plots.py",
    "chars": 1668,
    "preview": "# flake8: noqa\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport seaborn as sns\n\n# https://seaborn.pydata.org/ge"
  },
  {
    "path": "numpy_ml/plots/nn_schedulers_plots.py",
    "chars": 5421,
    "preview": "# flake8: noqa\n\nimport time\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport seaborn as sns\n\n# https://seaborn."
  },
  {
    "path": "numpy_ml/plots/nonparametric_plots.py",
    "chars": 8317,
    "preview": "# flake8: noqa\nimport numpy as np\n\nimport matplotlib.pyplot as plt\nimport seaborn as sns\n\n# https://seaborn.pydata.org/g"
  },
  {
    "path": "numpy_ml/plots/rl_plots.py",
    "chars": 2867,
    "preview": "# flake8: noqa\nimport gym\n\nfrom numpy_ml.rl_models.trainer import Trainer\nfrom numpy_ml.rl_models.agents import (\n    Cr"
  },
  {
    "path": "numpy_ml/plots/trees_plots.py",
    "chars": 5651,
    "preview": "# flake8: noqa\nimport numpy as np\n\nfrom sklearn.metrics import accuracy_score, mean_squared_error\nfrom sklearn.datasets "
  },
  {
    "path": "numpy_ml/preprocessing/README.md",
    "chars": 1445,
    "preview": "# Preprocessing\nThe preprocessing module implements common data preprocessing routines.\n\n- `nlp.py`: Routines and object"
  },
  {
    "path": "numpy_ml/preprocessing/__init__.py",
    "chars": 58,
    "preview": "from . import general\nfrom . import nlp\nfrom . import dsp\n"
  },
  {
    "path": "numpy_ml/preprocessing/dsp.py",
    "chars": 29314,
    "preview": "import numpy as np\nfrom numpy.lib.stride_tricks import as_strided\n\nfrom ..utils.windows import WindowInitializer\n\n######"
  },
  {
    "path": "numpy_ml/preprocessing/general.py",
    "chars": 12952,
    "preview": "import json\nimport hashlib\nimport warnings\n\nimport numpy as np\n\ntry:\n    from scipy.sparse import csr_matrix\n\n    _SCIPY"
  },
  {
    "path": "numpy_ml/preprocessing/nlp.py",
    "chars": 47718,
    "preview": "\"\"\"Common preprocessing utilities for working with text data\"\"\"\nimport re\nimport heapq\nimport os.path as op\nfrom collect"
  },
  {
    "path": "numpy_ml/rl_models/README.md",
    "chars": 1382,
    "preview": "# RL Models\nThe `agents.py` module implements a number of standard reinforcement learning (RL) agents that\ncan be run on"
  },
  {
    "path": "numpy_ml/rl_models/__init__.py",
    "chars": 86,
    "preview": "from . import rl_utils\nfrom . import agents\nfrom . import trainer\nfrom . import tiles\n"
  },
  {
    "path": "numpy_ml/rl_models/agents.py",
    "chars": 67435,
    "preview": "\"\"\"Reinforcement learning agents that can be run on OpenAI gym environs\"\"\"\n\nfrom abc import ABC, abstractmethod\nfrom col"
  },
  {
    "path": "numpy_ml/rl_models/rl_utils.py",
    "chars": 15287,
    "preview": "\"\"\"Utilities for training and evaluating RL models on OpenAI gym environments\"\"\"\nimport warnings\nfrom itertools import p"
  },
  {
    "path": "numpy_ml/rl_models/tiles/__init__.py",
    "chars": 21,
    "preview": "from . import tiles3\n"
  },
  {
    "path": "numpy_ml/rl_models/tiles/tiles3.py",
    "chars": 4067,
    "preview": "\"\"\"\nTile Coding Software version 3.0beta\nby Rich Sutton\nbased on a program created by Steph Schaeffer and others\nExterna"
  },
  {
    "path": "numpy_ml/rl_models/trainer.py",
    "chars": 5277,
    "preview": "from time import time\nimport numpy as np\n\n\nclass Trainer(object):\n    def __init__(self, agent, env):\n        \"\"\"\n      "
  },
  {
    "path": "numpy_ml/tests/__init__.py",
    "chars": 46,
    "preview": "\"\"\"Unit tests for various numpy-ml modules\"\"\"\n"
  },
  {
    "path": "numpy_ml/tests/nn_torch_models.py",
    "chars": 87492,
    "preview": "# flake8: noqa\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nimport tensorflow as tf\n\nimport nump"
  },
  {
    "path": "numpy_ml/tests/test_glm.py",
    "chars": 2209,
    "preview": "# flake8: noqa\nimport numpy as np\n\nimport statsmodels.api as sm\nfrom numpy_ml.linear_models import GeneralizedLinearMode"
  },
  {
    "path": "numpy_ml/tests/test_linear_regression.py",
    "chars": 3061,
    "preview": "# flake8: noqa\nimport numpy as np\n\nfrom sklearn.linear_model import LinearRegression as LinearRegressionGold\n\nfrom numpy"
  },
  {
    "path": "numpy_ml/tests/test_naive_bayes.py",
    "chars": 2397,
    "preview": "# flake8: noqa\nimport numpy as np\nfrom sklearn import datasets\nfrom sklearn.model_selection import train_test_split\n\nfro"
  },
  {
    "path": "numpy_ml/tests/test_ngram.py",
    "chars": 8272,
    "preview": "# flake8: noqa\nimport tempfile\n\nimport nltk\nimport numpy as np\n\nfrom ..preprocessing.nlp import tokenize_words\nfrom ..ng"
  },
  {
    "path": "numpy_ml/tests/test_nn.py",
    "chars": 78225,
    "preview": "# flake8: noqa\nimport time\nfrom copy import deepcopy\n\nimport numpy as np\nfrom numpy.testing import assert_almost_equal\n\n"
  },
  {
    "path": "numpy_ml/tests/test_nn_activations.py",
    "chars": 9942,
    "preview": "# flake8: noqa\nimport time\nimport numpy as np\n\nfrom numpy.testing import assert_almost_equal\nfrom scipy.special import e"
  },
  {
    "path": "numpy_ml/tests/test_nonparametric.py",
    "chars": 3288,
    "preview": "# flake8: noqa\nimport numpy as np\n\nfrom sklearn.neighbors import KNeighborsRegressor, KNeighborsClassifier\nfrom sklearn."
  },
  {
    "path": "numpy_ml/tests/test_preprocessing.py",
    "chars": 6510,
    "preview": "# flake8: noqa\nfrom collections import Counter\n\n# gold-standard imports\nimport huffman\nimport numpy as np\n\nfrom scipy.ff"
  },
  {
    "path": "numpy_ml/tests/test_trees.py",
    "chars": 12388,
    "preview": "# flake8: noqa\nimport numpy as np\n\nfrom sklearn.ensemble import RandomForestClassifier, RandomForestRegressor\nfrom sklea"
  },
  {
    "path": "numpy_ml/tests/test_utils.py",
    "chars": 9356,
    "preview": "# flake8: noqa\nimport numpy as np\n\nimport scipy\nimport networkx as nx\n\nfrom sklearn.neighbors import BallTree as sk_Ball"
  },
  {
    "path": "numpy_ml/trees/README.md",
    "chars": 888,
    "preview": "# Tree-Based Models\nThis module implements:\n\n1. [Decision trees](https://en.wikipedia.org/wiki/Decision_tree_learning) f"
  },
  {
    "path": "numpy_ml/trees/__init__.py",
    "chars": 77,
    "preview": "from . import losses\nfrom .dt import *\nfrom .rf import *\nfrom .gbdt import *\n"
  },
  {
    "path": "numpy_ml/trees/dt.py",
    "chars": 8019,
    "preview": "import numpy as np\n\n\nclass Node:\n    def __init__(self, left, right, rule):\n        self.left = left\n        self.right "
  },
  {
    "path": "numpy_ml/trees/gbdt.py",
    "chars": 6602,
    "preview": "import numpy as np\n\nfrom .dt import DecisionTree\nfrom .losses import MSELoss, CrossEntropyLoss\n\n\ndef to_one_hot(labels, "
  },
  {
    "path": "numpy_ml/trees/losses.py",
    "chars": 1799,
    "preview": "import numpy as np\n\n#######################################################################\n#                           "
  },
  {
    "path": "numpy_ml/trees/rf.py",
    "chars": 3543,
    "preview": "import numpy as np\nfrom .dt import DecisionTree\n\n\ndef bootstrap_sample(X, Y):\n    N, M = X.shape\n    idxs = np.random.ch"
  },
  {
    "path": "numpy_ml/utils/README.md",
    "chars": 1704,
    "preview": "# Utilities\n\nThe utilities module implements a number of useful functions and objects that\npower other ML algorithms acr"
  },
  {
    "path": "numpy_ml/utils/__init__.py",
    "chars": 191,
    "preview": "\"\"\"Utilities module\"\"\"\n\nfrom . import testing\nfrom . import data_structures\nfrom . import distance_metrics\nfrom . import"
  },
  {
    "path": "numpy_ml/utils/data_structures.py",
    "chars": 18440,
    "preview": "import heapq\nfrom copy import copy\nfrom collections import Hashable\n\nimport numpy as np\n\nfrom .distance_metrics import e"
  },
  {
    "path": "numpy_ml/utils/distance_metrics.py",
    "chars": 3159,
    "preview": "import numpy as np\n\n\ndef euclidean(x, y):\n    \"\"\"\n    Compute the Euclidean (`L2`) distance between two real vectors\n\n  "
  },
  {
    "path": "numpy_ml/utils/graphs.py",
    "chars": 11497,
    "preview": "from abc import ABC, abstractmethod\nfrom collections import defaultdict\nfrom itertools import combinations, permutations"
  },
  {
    "path": "numpy_ml/utils/kernels.py",
    "chars": 11972,
    "preview": "import re\nfrom abc import ABC, abstractmethod\n\nimport numpy as np\n\n\nclass KernelBase(ABC):\n    def __init__(self):\n     "
  },
  {
    "path": "numpy_ml/utils/misc.py",
    "chars": 593,
    "preview": "\"\"\"Miscellaneous utility functions\"\"\"\nimport numpy as np\n\n\ndef logsumexp(log_probs, axis=None):\n    \"\"\"\n    Redefine sci"
  },
  {
    "path": "numpy_ml/utils/testing.py",
    "chars": 4306,
    "preview": "\"\"\"Utilities for writing unit tests\"\"\"\nimport numbers\nimport numpy as np\n\n\n#############################################"
  },
  {
    "path": "numpy_ml/utils/windows.py",
    "chars": 5113,
    "preview": "import numpy as np\n\n\ndef blackman_harris(window_len, symmetric=False):\n    \"\"\"\n    The Blackman-Harris window.\n\n    Note"
  },
  {
    "path": "requirements-dev.txt",
    "chars": 177,
    "preview": "numpy\nscipy\nsklearn\ntorch\nnetworkx\nmatplotlib\nseaborn\ntensorflow\ngym\nkeras\nhuffman\nlibrosa==0.7.2\nllvmlite==0.32.1\nnumba"
  },
  {
    "path": "requirements-test.txt",
    "chars": 147,
    "preview": "numpy\nscipy\nsklearn\ntorch\nnetworkx\ntensorflow\nkeras\ngym\nhuffman\nlibrosa==0.7.2\nllvmlite==0.32.1\nnumba==0.45.0\nnltk\nhmmle"
  },
  {
    "path": "requirements.txt",
    "chars": 12,
    "preview": "numpy\nscipy\n"
  },
  {
    "path": "setup.py",
    "chars": 1386,
    "preview": "# flake8: noqa\nfrom codecs import open\n\nfrom setuptools import setup, find_packages\n\nwith open(\"README.md\", encoding=\"ut"
  },
  {
    "path": "tox.ini",
    "chars": 116,
    "preview": "[tox]\nenvlist = py36,py38\nskip_missing_interpreters=true\n[testenv]\ndeps = -rrequirements-test.txt\ncommands = pytest\n"
  }
]

About this extraction

This page contains the full source code of the ddbourgin/numpy-ml GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 194 files (1.3 MB), approximately 332.9k tokens, and a symbol index with 1357 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Extract another repo