Repository: keras-team/autokeras
Branch: master
Commit: a2446cf16edc
Files: 184
Total size: 601.8 KB
Directory structure:
gitextract_87bybot9/
├── .devcontainer/
│ ├── Dockerfile
│ ├── devcontainer.json
│ └── setup.sh
├── .dockerignore
├── .github/
│ ├── CODEOWNERS
│ ├── CODE_OF_CONDUCT.md
│ ├── CONTRIBUTING.md
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug_report.md
│ │ ├── custom.md
│ │ ├── feature_request.md
│ │ └── new-task-template.md
│ ├── PULL_REQUEST_TEMPLATE.md
│ ├── dependabot.yml
│ └── workflows/
│ └── actions.yml
├── .gitignore
├── LICENSE
├── README.md
├── RELEASE.md
├── autokeras/
│ ├── __init__.py
│ ├── adapters/
│ │ ├── __init__.py
│ │ ├── input_adapters.py
│ │ ├── input_adapters_test.py
│ │ ├── output_adapters.py
│ │ └── output_adapters_test.py
│ ├── analysers/
│ │ ├── __init__.py
│ │ ├── input_analysers.py
│ │ ├── input_analysers_test.py
│ │ ├── output_analysers.py
│ │ └── output_analysers_test.py
│ ├── auto_model.py
│ ├── auto_model_test.py
│ ├── blocks/
│ │ ├── __init__.py
│ │ ├── basic.py
│ │ ├── basic_test.py
│ │ ├── heads.py
│ │ ├── heads_test.py
│ │ ├── preprocessing.py
│ │ ├── preprocessing_test.py
│ │ ├── reduction.py
│ │ ├── reduction_test.py
│ │ ├── wrapper.py
│ │ └── wrapper_test.py
│ ├── conftest.py
│ ├── engine/
│ │ ├── __init__.py
│ │ ├── adapter.py
│ │ ├── adapter_test.py
│ │ ├── analyser.py
│ │ ├── analyser_test.py
│ │ ├── block.py
│ │ ├── block_test.py
│ │ ├── head.py
│ │ ├── head_test.py
│ │ ├── hyper_preprocessor.py
│ │ ├── io_hypermodel.py
│ │ ├── named_hypermodel.py
│ │ ├── node.py
│ │ ├── node_test.py
│ │ ├── preprocessor.py
│ │ ├── serializable.py
│ │ ├── tuner.py
│ │ └── tuner_test.py
│ ├── graph.py
│ ├── graph_test.py
│ ├── hyper_preprocessors.py
│ ├── hyper_preprocessors_test.py
│ ├── integration_tests/
│ │ ├── __init__.py
│ │ ├── functional_api_test.py
│ │ ├── io_api_test.py
│ │ └── task_api_test.py
│ ├── keras_layers.py
│ ├── keras_layers_test.py
│ ├── nodes.py
│ ├── nodes_test.py
│ ├── pipeline.py
│ ├── pipeline_test.py
│ ├── preprocessors/
│ │ ├── __init__.py
│ │ ├── common.py
│ │ ├── common_test.py
│ │ ├── encoders.py
│ │ ├── encoders_test.py
│ │ ├── postprocessors.py
│ │ └── postprocessors_test.py
│ ├── tasks/
│ │ ├── __init__.py
│ │ ├── image.py
│ │ ├── image_test.py
│ │ ├── structured_data.py
│ │ ├── structured_data_test.py
│ │ ├── text.py
│ │ └── text_test.py
│ ├── test_utils.py
│ ├── tuners/
│ │ ├── __init__.py
│ │ ├── bayesian_optimization.py
│ │ ├── greedy.py
│ │ ├── greedy_test.py
│ │ ├── hyperband.py
│ │ ├── hyperband_test.py
│ │ ├── random_search.py
│ │ ├── random_search_test.py
│ │ ├── task_specific.py
│ │ └── task_specific_test.py
│ └── utils/
│ ├── __init__.py
│ ├── data_utils.py
│ ├── data_utils_test.py
│ ├── io_utils.py
│ ├── layer_utils.py
│ ├── types.py
│ ├── utils.py
│ └── utils_test.py
├── benchmark/
│ ├── README.md
│ ├── __init__.py
│ ├── datasets/
│ │ ├── titanic_test.csv
│ │ └── titanic_train.csv
│ ├── experiments/
│ │ ├── __init__.py
│ │ ├── experiment.py
│ │ ├── image.py
│ │ ├── structured_data.py
│ │ └── text.py
│ ├── performance.py
│ └── run.py
├── codecov.yml
├── docker/
│ ├── Dockerfile
│ ├── Makefile
│ ├── README.md
│ ├── devel.Dockerfile
│ ├── pre-commit.Dockerfile
│ └── pre_commit.py
├── docs/
│ ├── README.md
│ ├── autogen.py
│ ├── ipynb/
│ │ ├── customized.ipynb
│ │ ├── export.ipynb
│ │ ├── image_classification.ipynb
│ │ ├── image_regression.ipynb
│ │ ├── multi.ipynb
│ │ ├── structured_data_classification.ipynb
│ │ ├── structured_data_regression.ipynb
│ │ ├── text_classification.ipynb
│ │ └── text_regression.ipynb
│ ├── keras_autodoc/
│ │ ├── __init__.py
│ │ ├── autogen.py
│ │ ├── docstring.py
│ │ ├── examples.py
│ │ ├── gathering_members.py
│ │ ├── get_signatures.py
│ │ └── utils.py
│ ├── mkdocs.yml
│ ├── py/
│ │ ├── customized.py
│ │ ├── export.py
│ │ ├── image_classification.py
│ │ ├── image_regression.py
│ │ ├── multi.py
│ │ ├── structured_data_classification.py
│ │ ├── structured_data_regression.py
│ │ ├── text_classification.py
│ │ └── text_regression.py
│ ├── requirements.txt
│ ├── run_py_files.sh
│ ├── templates/
│ │ ├── about.md
│ │ ├── index.md
│ │ ├── install.md
│ │ ├── stylesheets/
│ │ │ └── extra.css
│ │ └── tutorial/
│ │ ├── faq.md
│ │ └── overview.md
│ └── tutobooks.py
├── examples/
│ ├── automodel_with_cnn.py
│ ├── celeb_age.py
│ ├── cifar10.py
│ ├── imdb.py
│ ├── mnist.py
│ ├── new_pop.py
│ └── reuters.py
├── pyproject.toml
├── setup.cfg
└── shell/
├── contributors.py
├── contributors.sh
├── copyright.txt
├── cov.sh
├── coverage.sh
├── docs.sh
├── format.sh
├── generate_json.js
├── lint.sh
├── perf.sh
├── pre-commit.sh
└── pypi.sh
================================================
FILE CONTENTS
================================================
================================================
FILE: .devcontainer/Dockerfile
================================================
FROM mcr.microsoft.com/vscode/devcontainers/python:3.10
COPY setup.sh /setup.sh
================================================
FILE: .devcontainer/devcontainer.json
================================================
{
"dockerFile": "Dockerfile",
"postCreateCommand": "sh /setup.sh",
"customizations": {
"vscode": {
"settings": {
"python.linting.enabled": true,
"python.linting.flake8Enabled": true,
"python.linting.pylintEnabled": false,
"python.testing.pytestEnabled": true,
"editor.formatOnSave": true,
"editor.codeActionsOnSave": {
"source.organizeImports": true
},
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter"
},
"editor.rulers": [
80
]
},
"extensions": [
"ms-python.python",
"ms-python.isort",
"ms-python.flake8",
"ms-python.black-formatter"
]
}
},
"features": {
"ghcr.io/devcontainers/features/github-cli:1": {}
}
}
================================================
FILE: .devcontainer/setup.sh
================================================
sudo pip install --upgrade pip
sudo pip install -e ".[tests]"
echo "sh shell/lint.sh" > .git/hooks/pre-commit
chmod a+x .git/hooks/pre-commit
================================================
FILE: .dockerignore
================================================
.git
.github
*.Dockerfile
================================================
FILE: .github/CODEOWNERS
================================================
* @haifeng-jin @fchollet
================================================
FILE: .github/CODE_OF_CONDUCT.md
================================================
# Contributor Covenant Code of Conduct
## Our Pledge
In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
## Our Standards
Examples of behavior that contributes to creating a positive environment include:
* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members
Examples of unacceptable behavior by participants include:
* The use of sexualized language or imagery and unwelcome sexual attention or advances
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a professional setting
## Our Responsibilities
Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
## Scope
This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at jin@tamu.edu. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
[homepage]: http://contributor-covenant.org
[version]: http://contributor-covenant.org/version/1/4/
================================================
FILE: .github/CONTRIBUTING.md
================================================
# Contributing Guide
Contributions are welcome, and greatly appreciated!
This page is only a guide of the best practices of contributing code to AutoKeras.
The best way to contribute is to join our community by reading [this](https://autokeras.com/#contributing-code).
We will get you started right away.
Follow the tag of [good first issue](https://github.com/keras-team/autokeras/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22)
for the issues for beginner.
## Pull Request Guide
1. Is this the first pull request that you're making with GitHub? If so, read the guide [Making a pull request to an open-source project](https://github.com/gabrieldemarmiesse/getting_started_open_source).
2. Include "resolves #issue_number" in the description of the pull request if applicable. Briefly describe your contribution.
3. Submit the pull request from the first day of your development and create it as a [draft pull request](https://github.blog/2019-02-14-introducing-draft-pull-requests/). Click `ready for review` when finished and passed the all the checks.
4. For the case of bug fixes, add new test cases which would fail before your bug fix.
## Setup Environment
We introduce 3 different options: **GitHub Codespaces**, **VS Code & Dev Containers**, **the general setup**.
You can choose base on your preference.
### Option 1: GitHub Codespaces
You can simply open the repository in GitHub Codespaces.
The environment is already setup there.
### Option 2: VS Code & Dev Containers
Open VS Code.
Install the `Dev Containers` extension.
Press `F1` key. Enter `Dev Containers: Open Folder in Container...` to open the repository root folder.
The environment is already setup there.
### Option 3: The General Setup
Install [Virtualenvwrapper](https://virtualenvwrapper.readthedocs.io/en/latest/).
Create a new virtualenv named `ak` based on python3.
```
mkvirtualenv -p python3 ak
```
Please use this virtualenv for development.
Clone the repo. Go to the repo directory.
Run the following commands.
```
workon ak
pip install -e ".[tests]"
pip uninstall autokeras
add2virtualenv .
```
## Run Tests
### GitHub Codespaces or VS Code & Dev Containers
If you are using "GitHub Codespaces" or "VS Code & Dev Containers",
you can simply open any `*_test.py` file under the `tests` directory,
and wait a few seconds, you will see the test tab on the left of the window.
### General Setup
If you are using the general setup.
Activate the virtualenv.
Go to the repo directory
Run the following lines to run the tests.
Run all the tests.
```
pytest tests
```
Run all the unit tests.
```
pytest tests/autokeras
```
Run all the integration tests.
```
pytest tests/integration_tests
```
## Code Style
You can run the following manually every time you want to format your code.
1. Run `shell/format.sh` to format your code.
2. Run `shell/lint.sh` to check.
## Docstrings
Docstrings should follow our style.
Just check the style of other docstrings in AutoKeras.
================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.md
================================================
---
name: Bug report
about: Create a report to help us improve
labels: ["bug report"]
title: "Bug: "
---
### Bug Description
### Bug Reproduction
Code for reproducing the bug:
Data used by the code:
### Expected Behavior
### Setup Details
Include the details about the versions of:
- OS type and version:
- Python:
- autokeras:
- keras-tuner:
- scikit-learn:
- numpy:
- pandas:
- tensorflow:
### Additional context
================================================
FILE: .github/ISSUE_TEMPLATE/custom.md
================================================
---
name: Custom issue template
about: Describe this issue template's purpose here.
---
================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.md
================================================
---
name: Feature request
about: Suggest an idea for this project
labels: ["feature request"]
title: "Feature: "
---
### Feature Description
### Code Example
```python
```
### Reason
### Solution
================================================
FILE: .github/ISSUE_TEMPLATE/new-task-template.md
================================================
---
name: New Task Template
about: Add a new machine learning task
labels: ["algorithm"]
title: "New Task: "
---
### Suggested Name
### Task Description
### Evaluation Metrics
### Benchmark Datasets
### Reason
### Solution
### Additional Context
================================================
FILE: .github/PULL_REQUEST_TEMPLATE.md
================================================
### Which issue(s) does this Pull Request fix?
resolves #000
### Details of the Pull Request
================================================
FILE: .github/dependabot.yml
================================================
version: 2
updates:
- package-ecosystem: pip
directory: "/"
schedule:
interval: daily
time: "11:00"
open-pull-requests-limit: 10
ignore:
- dependency-name: sphinx
versions:
- ">= 3.1.a, < 3.2"
- dependency-name: typeguard
versions:
- ">= 2.11.a, < 2.12"
- dependency-name: typeguard
versions:
- ">= 2.12.a, < 2.13"
================================================
FILE: .github/workflows/actions.yml
================================================
name: Tests
on:
push:
branches: [ master ]
pull_request:
release:
types: [created]
jobs:
build:
name: Run tests
runs-on: ubuntu-latest
env:
KERAS_BACKEND: torch
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.10
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Get pip cache dir
id: pip-cache
run: |
python -m pip install --upgrade pip setuptools
echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT
- name: pip cache
uses: actions/cache@v3
with:
path: ${{ steps.pip-cache.outputs.dir }}
key: ${{ runner.os }}-pip-${{ hashFiles('pyproject.toml') }}
- name: Install dependencies
run: |
pip install torch --index-url https://download.pytorch.org/whl/cpu
pip install -e ".[tests]" --progress-bar off
- name: Test with pytest
run: |
pytest --cov=autokeras --cov-report xml:coverage.xml
- name: Codecov
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ./coverage.xml
flags: unittests
fail_ci_if_error: true
format:
name: Check the code format
runs-on: ubuntu-latest
env:
KERAS_BACKEND: torch
steps:
- uses: actions/checkout@v3
- name: Run pre-commit
run: bash shell/pre-commit.sh
build-docs:
name: Build the docs
runs-on: ubuntu-latest
env:
KERAS_BACKEND: torch
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.10
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install dependencies
run: |
python -m pip install --upgrade pip setuptools
pip install torch --index-url https://download.pytorch.org/whl/cpu
pip install -e .
pip install -r docs/requirements.txt
- name: Build the docs
run: |
cd docs
python autogen.py
mkdocs build
deploy:
needs: [build, format, build-docs]
if: github.event_name == 'release' && github.event.action == 'created'
runs-on: ubuntu-latest
env:
KERAS_BACKEND: torch
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install dependencies
run: |
python -m pip install --upgrade pip build twine
- name: Build and publish
env:
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
python -m build
twine upload dist/*
================================================
FILE: .gitignore
================================================
# vim swp files
*.swp
# caffe/pytorch model files
*.pth
# Mkdocs
/docs/sources
/docs/site
.DS_Store
.vscode
settings.json
.idea
.pytest_cache
/experiments
# resource temp folder
tests/resources/temp/*
!tests/resources/temp/.gitkeep
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
cov.xml
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
.static_storage/
.media/
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
examples/text_cnn/glove_embedding/
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2020 The AutoKeras Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README.md
================================================
[](https://github.com/keras-team/autokeras/actions?query=workflow%3ATests+branch%3Amaster)
[](https://codecov.io/gh/keras-team/autokeras)
[](https://badge.fury.io/py/autokeras)
[](https://github.com/keras-team/autokeras/issues)
Official Website: [autokeras.com](https://autokeras.com)
##
AutoKeras: An AutoML system based on Keras.
It is developed by DATA Lab at Texas A&M University.
The goal of AutoKeras is to make machine learning accessible to everyone.
## Learning resources
* A short example.
```python
import autokeras as ak
clf = ak.ImageClassifier()
clf.fit(x_train, y_train)
results = clf.predict(x_test)
```
* [Official website tutorials](https://autokeras.com/tutorial/overview/).
* The book of [*Automated Machine Learning in Action*](https://www.manning.com/books/automated-machine-learning-in-action?query=automated&utm_source=jin&utm_medium=affiliate&utm_campaign=affiliate&a_aid=jin).
* The LiveProjects of [*Image Classification with AutoKeras*](https://www.manning.com/liveprojectseries/autokeras-ser).
 
 
## Installation
To install the package, please use the `pip` installation as follows:
```shell
pip3 install autokeras
```
Please follow the [installation guide](https://autokeras.com/install) for more details.
**Note:** Currently, AutoKeras is only compatible with **Python >= 3.7** and **TensorFlow >= 2.8.0**.
## Community
Ask your questions on our [GitHub Discussions](https://github.com/keras-team/autokeras/discussions).
## Contributing Code
Here is how we manage our project.
We pick the critical issues to work on from [GitHub issues](https://github.com/keras-team/autokeras/issues).
They will be added to this [Project](https://github.com/keras-team/autokeras/projects/3).
Some of the issues will then be added to the [milestones](https://github.com/keras-team/autokeras/milestones),
which are used to plan for the releases.
Refer to our [Contributing Guide](https://autokeras.com/contributing/) to learn the best practices.
Thank all the contributors!
[](https://github.com/keras-team/autokeras/graphs/contributors)
## Cite this work
Haifeng Jin, François Chollet, Qingquan Song, and Xia Hu. "AutoKeras: An AutoML Library for Deep Learning." *the Journal of machine Learning research* 6 (2023): 1-6. ([Download](http://jmlr.org/papers/v24/20-1355.html))
Biblatex entry:
```bibtex
@article{JMLR:v24:20-1355,
author = {Haifeng Jin and François Chollet and Qingquan Song and Xia Hu},
title = {AutoKeras: An AutoML Library for Deep Learning},
journal = {Journal of Machine Learning Research},
year = {2023},
volume = {24},
number = {6},
pages = {1--6},
url = {http://jmlr.org/papers/v24/20-1355.html}
}
```
## Acknowledgements
The authors gratefully acknowledge the D3M program of the Defense Advanced Research Projects Agency (DARPA) administered through AFRL contract FA8750-17-2-0116; the Texas A&M College of Engineering, and Texas A&M University.
================================================
FILE: RELEASE.md
================================================
# Release v2.0.0
## Breaking changes
* Requires `keras>=3.0.0` instead of `tf.keras`.
* Removed the structured data related tasks by removing the following public
APIs:
* `CategoricalToNumerical`
* `MultiCategoryEncoding`
* `StructuredDataInput`
* `StructuredDataBlock`
* `StructuredDataClassifier`
* `StructuredDataRegressor`
* Removed the Time series related tasks by removing the following public APIs:
* `TimeseriesInput`
* `TimeseriesForecaster`
* Reduced search space of Text related tasks by removing the following blocks.
* `Embedding`
* `TextToIntSequence`
* `TextToNgramVector`
* `Transformer`
# Release v1.1.0
## Breaking changes
* This only affect you if you use `BertTokenizer` or `BertEncoder` in AutoKeras
explicity. You are not affected if you only use `BertBlock`, `TextClassifier`
or `TextRegressor`. Removed the AutoKeras implementation of `BertTokenizer`
and `BertEncoder`. Use `keras-nlp` implementation instead.
## New features
## Bug fixes
* Now also support `numpy>=1.24`.
================================================
FILE: autokeras/__init__.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from autokeras.auto_model import AutoModel
from autokeras.blocks import ClassificationHead
from autokeras.blocks import ConvBlock
from autokeras.blocks import DenseBlock
from autokeras.blocks import EfficientNetBlock
from autokeras.blocks import Embedding
from autokeras.blocks import Flatten
from autokeras.blocks import ImageAugmentation
from autokeras.blocks import ImageBlock
from autokeras.blocks import Merge
from autokeras.blocks import Normalization
from autokeras.blocks import RegressionHead
from autokeras.blocks import ResNetBlock
from autokeras.blocks import RNNBlock
from autokeras.blocks import SpatialReduction
from autokeras.blocks import StructuredDataBlock
from autokeras.blocks import TemporalReduction
from autokeras.blocks import TextBlock
from autokeras.blocks import XceptionBlock
from autokeras.engine.block import Block
from autokeras.engine.head import Head
from autokeras.engine.node import Node
from autokeras.keras_layers import CastToFloat32
from autokeras.keras_layers import ExpandLastDim
from autokeras.nodes import ImageInput
from autokeras.nodes import Input
from autokeras.nodes import StructuredDataInput
from autokeras.nodes import TextInput
from autokeras.tasks import ImageClassifier
from autokeras.tasks import ImageRegressor
from autokeras.tasks import StructuredDataClassifier
from autokeras.tasks import StructuredDataRegressor
from autokeras.tasks import TextClassifier
from autokeras.tasks import TextRegressor
from autokeras.tuners import BayesianOptimization
from autokeras.tuners import Greedy
from autokeras.tuners import Hyperband
from autokeras.tuners import RandomSearch
__version__ = "3.0.0"
CUSTOM_OBJECTS = {
"CastToFloat32": CastToFloat32,
"ExpandLastDim": ExpandLastDim,
}
================================================
FILE: autokeras/adapters/__init__.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from autokeras.adapters.input_adapters import ImageAdapter
from autokeras.adapters.input_adapters import InputAdapter
from autokeras.adapters.input_adapters import StructuredDataAdapter
from autokeras.adapters.input_adapters import TextAdapter
from autokeras.adapters.output_adapters import ClassificationAdapter
from autokeras.adapters.output_adapters import RegressionAdapter
================================================
FILE: autokeras/adapters/input_adapters.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from autokeras.engine import adapter as adapter_module
class InputAdapter(adapter_module.Adapter):
def check(self, x):
"""Record any information needed by transform."""
if not isinstance(x, np.ndarray):
raise TypeError(
"Expect the data to Input to be numpy.ndarray, "
"but got {type}.".format(type=type(x))
)
if isinstance(x, np.ndarray) and not np.issubdtype(x.dtype, np.number):
raise TypeError(
"Expect the data to Input to be numerical, but got "
"{type}.".format(type=x.dtype)
)
class ImageAdapter(adapter_module.Adapter):
def check(self, x):
"""Record any information needed by transform."""
if not isinstance(x, np.ndarray):
raise TypeError(
"Expect the data to ImageInput to be numpy.ndarray, "
"but got {type}.".format(type=type(x))
)
if isinstance(x, np.ndarray) and not np.issubdtype(x.dtype, np.number):
raise TypeError(
"Expect the data to ImageInput to be numerical, but got "
"{type}.".format(type=x.dtype)
)
class TextAdapter(adapter_module.Adapter):
def check(self, x):
"""Record any information needed by transform."""
if not isinstance(x, np.ndarray):
raise TypeError(
"Expect the data to TextInput to be numpy.ndarray, "
"but got {type}.".format(type=type(x))
)
class StructuredDataAdapter(adapter_module.Adapter):
def check(self, x):
if not isinstance(x, np.ndarray):
raise TypeError(
"Unsupported type {type} for "
"{name}.".format(type=type(x), name=self.__class__.__name__)
)
================================================
FILE: autokeras/adapters/input_adapters_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import pandas as pd
import pytest
from autokeras import test_utils
from autokeras.adapters import input_adapters
from autokeras.utils import data_utils
def test_image_input_adapter_transform_to_dataset():
x = test_utils.generate_data()
adapter = input_adapters.ImageAdapter()
assert isinstance(adapter.adapt(x), np.ndarray)
def test_image_input_unsupported_type():
x = "unknown"
adapter = input_adapters.ImageAdapter()
with pytest.raises(TypeError) as info:
x = adapter.adapt(x)
assert "Expect the data to ImageInput to be numpy" in str(info.value)
def test_image_input_numerical():
x = np.array([[["unknown"]]])
adapter = input_adapters.ImageAdapter()
with pytest.raises(TypeError) as info:
x = adapter.adapt(x)
assert "Expect the data to ImageInput to be numerical" in str(info.value)
def test_input_type_error():
x = "unknown"
adapter = input_adapters.InputAdapter()
with pytest.raises(TypeError) as info:
x = adapter.adapt(x)
assert "Expect the data to Input to be numpy" in str(info.value)
def test_input_numerical():
x = np.array([[["unknown"]]])
adapter = input_adapters.InputAdapter()
with pytest.raises(TypeError) as info:
x = adapter.adapt(x)
assert "Expect the data to Input to be numerical" in str(info.value)
def test_text_adapt_np():
x = np.array(["a b c", "b b c"])
adapter = input_adapters.TextAdapter()
x = adapter.adapt(x)
assert data_utils.dataset_shape(x) == [2]
assert isinstance(x, np.ndarray)
def test_text_input_type_error():
x = "unknown"
adapter = input_adapters.TextAdapter()
with pytest.raises(TypeError) as info:
x = adapter.adapt(x)
assert "Expect the data to TextInput to be numpy" in str(info.value)
def test_structured_data_input_unsupported_type_error():
with pytest.raises(TypeError) as info:
adapter = input_adapters.StructuredDataAdapter()
adapter.adapt("unknown")
assert "Unsupported type" in str(info.value)
def test_structured_data_input_transform_to_dataset():
x = pd.read_csv(test_utils.TRAIN_CSV_PATH).to_numpy().astype(str)
adapter = input_adapters.StructuredDataAdapter()
x = adapter.adapt(x)
assert isinstance(x, np.ndarray)
================================================
FILE: autokeras/adapters/output_adapters.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from autokeras.engine import adapter as adapter_module
class HeadAdapter(adapter_module.Adapter):
def __init__(self, name, **kwargs):
super().__init__(**kwargs)
self.name = name
def check(self, dataset):
if not isinstance(dataset, np.ndarray):
raise TypeError(
f"Expect the target data of {self.name} to be"
f" np.ndarray, but got {type(dataset)}."
)
class ClassificationAdapter(HeadAdapter):
pass
class RegressionAdapter(HeadAdapter):
pass
================================================
FILE: autokeras/adapters/output_adapters_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import pytest
from autokeras.adapters import output_adapters
def test_unsupported_types_error():
adapter = output_adapters.ClassificationAdapter(name="a")
with pytest.raises(TypeError) as info:
adapter.adapt(1)
assert "Expect the target data of a to be" in str(info.value)
def test_reg_head_transform_1d_np():
adapter = output_adapters.RegressionAdapter(name="a")
y = adapter.adapt(np.random.rand(10))
assert isinstance(y, np.ndarray)
================================================
FILE: autokeras/analysers/__init__.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from autokeras.analysers.input_analysers import CATEGORICAL
from autokeras.analysers.input_analysers import NUMERICAL
from autokeras.analysers.input_analysers import ImageAnalyser
from autokeras.analysers.input_analysers import InputAnalyser
from autokeras.analysers.input_analysers import StructuredDataAnalyser
from autokeras.analysers.input_analysers import TextAnalyser
from autokeras.analysers.output_analysers import ClassificationAnalyser
from autokeras.analysers.output_analysers import RegressionAnalyser
================================================
FILE: autokeras/analysers/input_analysers.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from autokeras.engine import analyser
CATEGORICAL = "categorical"
NUMERICAL = "numerical"
class InputAnalyser(analyser.Analyser):
def finalize(self):
return
class ImageAnalyser(InputAnalyser):
def __init__(self, **kwargs):
super().__init__(**kwargs)
def finalize(self):
if len(self.shape) not in [3, 4]:
raise ValueError(
"Expect the data to ImageInput to have shape (batch_size, "
"height, width, channels) or (batch_size, height, width) "
"dimensions, but got input shape {shape}".format(
shape=self.shape
)
)
class TextAnalyser(InputAnalyser):
def correct_shape(self):
if len(self.shape) == 1:
return True
return len(self.shape) == 2 and self.shape[1] == 1
def finalize(self):
if not self.correct_shape():
raise ValueError(
"Expect the data to TextInput to have shape "
"(batch_size, 1), but "
"got input shape {shape}.".format(shape=self.shape)
)
if not (self.dtype == "string"):
raise TypeError(
"Expect the data to TextInput to be strings, but got "
"{type}.".format(type=self.dtype)
)
class StructuredDataAnalyser(InputAnalyser):
def __init__(self, column_names=None, column_types=None, **kwargs):
super().__init__(**kwargs)
self.column_names = column_names
self.column_types = column_types
# Variables for inferring column types.
self.count_numerical = None
self.count_categorical = None
self.count_unique_numerical = []
self.num_col = None
def update(self, data):
super().update(data)
# The super class set self.dtype to "string" based on the input numpy
# array. However, the preprocessor will encode it to float32. So, set
# self.dtype to "float32", which would be propagated to the Keras Input
# node.
self.dtype = "float32"
if len(self.shape) != 2:
return
# data is a numpy array containing all the data.
self.num_col = data.shape[1]
self.count_numerical = np.zeros(self.num_col)
self.count_categorical = np.zeros(self.num_col)
self.count_unique_numerical = [{} for _ in range(self.num_col)]
for i in range(self.num_col):
self._update_column(data[:, i], i)
def _update_column(self, column_data, i):
# Vectorized check for numerical values
def is_numerical(x):
try:
float(x)
return True
except (ValueError, TypeError):
return False
numerical_mask = np.vectorize(is_numerical)(column_data)
self.count_numerical[i] = np.sum(numerical_mask)
self.count_categorical[i] = len(column_data) - np.sum(numerical_mask)
if np.any(numerical_mask):
# Get numerical values
numerical_values = column_data[numerical_mask]
# Handle bytes
numerical_values = np.array(
[
x.decode("utf-8") if isinstance(x, bytes) else x
for x in numerical_values
]
)
numerical_floats = numerical_values.astype(float)
unique_vals, counts = np.unique(
numerical_floats, return_counts=True
)
self.count_unique_numerical[i] = dict(zip(unique_vals, counts))
def finalize(self):
self.check()
self.infer_column_types()
def get_input_name(self):
return "StructuredDataInput"
def check(self):
if len(self.shape) != 2:
raise ValueError(
"Expect the data to {input_name} to have shape "
"(batch_size, num_features), but "
"got input shape {shape}.".format(
input_name=self.get_input_name(), shape=self.shape
)
)
# Fill in the column_names
if self.column_names is None:
if self.column_types:
raise ValueError(
"column_names must be specified, if "
"column_types is specified."
)
self.column_names = [str(index) for index in range(self.shape[1])]
# Check if column_names has the correct length.
if len(self.column_names) != self.shape[1]:
raise ValueError(
"Expect column_names to have length {expect} "
"but got {actual}.".format(
expect=self.shape[1], actual=len(self.column_names)
)
)
def infer_column_types(self):
column_types = {}
for i in range(self.num_col):
if self.count_categorical[i] > 0:
column_types[self.column_names[i]] = CATEGORICAL
elif (
len(self.count_unique_numerical[i]) / self.count_numerical[i]
< 0.05
):
column_types[self.column_names[i]] = CATEGORICAL
else:
column_types[self.column_names[i]] = NUMERICAL
# Partial column_types is provided.
if self.column_types is None:
self.column_types = {}
for key, value in column_types.items():
if key not in self.column_types:
self.column_types[key] = value
================================================
FILE: autokeras/analysers/input_analysers_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
import numpy as np
import pandas as pd
import pytest
from autokeras import test_utils
from autokeras.analysers import input_analysers
def test_structured_data_input_less_col_name_error():
with pytest.raises(ValueError) as info:
analyser = input_analysers.StructuredDataAnalyser(
column_names=list(range(8))
)
dataset = np.random.rand(20, 10)
analyser.update(dataset)
analyser.finalize()
assert "Expect column_names to have length" in str(info.value)
def test_structured_data_infer_col_types():
analyser = input_analysers.StructuredDataAnalyser(
column_names=test_utils.COLUMN_NAMES,
column_types=None,
)
x = pd.read_csv(test_utils.TRAIN_CSV_PATH)
x.pop("survived")
dataset = x.values.astype(str)
analyser.update(dataset)
analyser.finalize()
assert analyser.column_types == test_utils.COLUMN_TYPES
def test_dont_infer_specified_column_types():
column_types = copy.copy(test_utils.COLUMN_TYPES)
column_types.pop("sex")
column_types["age"] = "categorical"
analyser = input_analysers.StructuredDataAnalyser(
column_names=test_utils.COLUMN_NAMES,
column_types=column_types,
)
x = pd.read_csv(test_utils.TRAIN_CSV_PATH)
x.pop("survived")
dataset = x.values.astype(str)
analyser.update(dataset)
analyser.finalize()
assert analyser.column_types["age"] == "categorical"
def test_structured_data_input_with_illegal_dim():
analyser = input_analysers.StructuredDataAnalyser(
column_names=test_utils.COLUMN_NAMES,
column_types=None,
)
dataset = np.random.rand(100, 32, 32)
with pytest.raises(ValueError) as info:
analyser.update(dataset)
analyser.finalize()
assert "Expect the data to StructuredDataInput to have shape" in str(
info.value
)
def test_image_input_analyser_shape_is_list_of_int():
analyser = input_analysers.ImageAnalyser()
dataset = np.random.rand(100, 32, 32, 3)
analyser.update(dataset)
analyser.finalize()
assert isinstance(analyser.shape, list)
assert all(map(lambda x: isinstance(x, int), analyser.shape))
def test_image_input_with_three_dim():
analyser = input_analysers.ImageAnalyser()
dataset = np.random.rand(100, 32, 32)
analyser.update(dataset)
analyser.finalize()
assert len(analyser.shape) == 3
def test_image_input_with_illegal_dim():
analyser = input_analysers.ImageAnalyser()
dataset = np.random.rand(100, 32)
with pytest.raises(ValueError) as info:
analyser.update(dataset)
analyser.finalize()
assert "Expect the data to ImageInput to have shape" in str(info.value)
def test_text_input_with_illegal_dim():
analyser = input_analysers.TextAnalyser()
dataset = np.random.rand(100, 32)
with pytest.raises(ValueError) as info:
analyser.update(dataset)
analyser.finalize()
assert "Expect the data to TextInput to have shape" in str(info.value)
def test_text_analyzer_with_one_dim_doesnt_crash():
analyser = input_analysers.TextAnalyser()
dataset = np.array(["a b c", "b b c"])
analyser.update(dataset)
analyser.finalize()
def test_text_illegal_type_error():
analyser = input_analysers.TextAnalyser()
dataset = np.random.rand(100, 1)
with pytest.raises(TypeError) as info:
analyser.update(dataset)
analyser.finalize()
assert "Expect the data to TextInput to be strings" in str(info.value)
================================================
FILE: autokeras/analysers/output_analysers.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from autokeras.engine import analyser
class TargetAnalyser(analyser.Analyser):
def __init__(self, name=None, **kwargs):
super().__init__(**kwargs)
self.name = name
class ClassificationAnalyser(TargetAnalyser):
def __init__(self, num_classes=None, multi_label=False, **kwargs):
super().__init__(**kwargs)
self.num_classes = num_classes
self.label_encoder = None
self.multi_label = multi_label
self.labels = set()
def update(self, data):
super().update(data)
if len(self.shape) > 2:
raise ValueError(
"Expect the target data for {name} to have shape "
"(batch_size, num_classes), "
"but got {shape}.".format(name=self.name, shape=self.shape)
)
if len(self.shape) > 1 and self.shape[1] > 1:
return
self.labels = self.labels.union(set(np.unique(data)))
def finalize(self):
# TODO: support raw string labels for multi-label.
self.labels = sorted(list(self.labels))
# Infer the num_classes if not specified.
if not self.num_classes:
if self.encoded:
# Single column with 0s and 1s.
if len(self.shape) == 1 or self.shape[1:] == [1]:
self.num_classes = 2
else:
self.num_classes = self.shape[1]
else:
self.num_classes = len(self.labels)
if self.num_classes < 2:
raise ValueError(
"Expect the target data for {name} to have "
"at least 2 classes, but got {num_classes}.".format(
name=self.name, num_classes=self.num_classes
)
)
# Check shape equals expected shape.
expected = self.get_expected_shape()
actual = self.shape[1:]
if len(actual) == 0:
actual = [1]
if self.encoded and actual != expected:
raise ValueError(
"Expect the target data for {name} to have "
"shape {expected}, but got {actual}.".format(
name=self.name, expected=expected, actual=self.shape[1:]
)
)
def get_expected_shape(self):
# Compute expected shape from num_classes.
if self.num_classes == 2 and not self.multi_label:
return [1]
return [self.num_classes]
@property
def encoded(self):
return self.encoded_for_sigmoid or self.encoded_for_softmax
@property
def encoded_for_sigmoid(self):
if len(self.labels) != 2:
return False
return sorted(self.labels) == [0, 1]
@property
def encoded_for_softmax(self):
return len(self.shape) > 1 and self.shape[1] > 1
class RegressionAnalyser(TargetAnalyser):
def __init__(self, output_dim=None, **kwargs):
super().__init__(**kwargs)
self.output_dim = output_dim
def finalize(self):
if self.output_dim and (self.expected_dim() != self.output_dim):
raise ValueError(
"Expect the target data for {name} to have shape "
"(batch_size, {output_dim}), "
"but got {shape}.".format(
name=self.name, output_dim=self.output_dim, shape=self.shape
)
)
def expected_dim(self):
if len(self.shape) == 1:
return 1
return self.shape[1]
================================================
FILE: autokeras/analysers/output_analysers_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import pytest
from autokeras import test_utils
from autokeras.analysers import output_analysers
def test_clf_head_one_hot_shape_error():
analyser = output_analysers.ClassificationAnalyser(name="a", num_classes=9)
dataset = test_utils.generate_one_hot_labels(num_classes=10)
with pytest.raises(ValueError) as info:
analyser.update(dataset)
analyser.finalize()
assert "Expect the target data for a to have shape" in str(info.value)
def test_clf_head_more_dim_error():
analyser = output_analysers.ClassificationAnalyser(name="a", num_classes=9)
dataset = np.random.rand(100, 32, 32, 3)
with pytest.raises(ValueError) as info:
analyser.update(dataset)
analyser.finalize()
assert "Expect the target data for a to have shape" in str(info.value)
def test_wrong_num_classes_error():
analyser = output_analysers.ClassificationAnalyser(name="a", num_classes=5)
dataset = np.random.rand(10, 3)
with pytest.raises(ValueError) as info:
analyser.update(dataset)
analyser.finalize()
assert "Expect the target data for a to have shape" in str(info.value)
def test_one_class_error():
analyser = output_analysers.ClassificationAnalyser(name="a")
dataset = np.array(["a", "a", "a"])
with pytest.raises(ValueError) as info:
analyser.update(dataset)
analyser.finalize()
assert "Expect the target data for a to have at least 2 classes" in str(
info.value
)
def test_infer_ten_classes():
analyser = output_analysers.ClassificationAnalyser(name="a")
dataset = test_utils.generate_one_hot_labels(num_classes=10)
analyser.update(dataset)
analyser.finalize()
assert analyser.num_classes == 10
def test_infer_single_column_two_classes():
analyser = output_analysers.ClassificationAnalyser(name="a")
dataset = np.random.randint(0, 2, 10)
analyser.update(dataset)
analyser.finalize()
assert analyser.num_classes == 2
def test_specify_five_classes():
analyser = output_analysers.ClassificationAnalyser(name="a", num_classes=5)
dataset = np.random.rand(10, 5)
analyser.update(dataset)
analyser.finalize()
assert analyser.num_classes == 5
def test_specify_two_classes_fit_single_column():
analyser = output_analysers.ClassificationAnalyser(name="a", num_classes=2)
dataset = np.random.rand(10, 1)
analyser.update(dataset)
analyser.finalize()
assert analyser.num_classes == 2
def test_multi_label_two_classes_has_two_columns():
analyser = output_analysers.ClassificationAnalyser(
name="a", multi_label=True
)
dataset = np.random.rand(10, 2)
analyser.update(dataset)
analyser.finalize()
assert analyser.encoded
def test_reg_with_specified_output_dim_error():
analyser = output_analysers.RegressionAnalyser(name="a", output_dim=3)
dataset = np.random.rand(10, 2)
with pytest.raises(ValueError) as info:
analyser.update(dataset)
analyser.finalize()
assert "Expect the target data for a to have shape" in str(info.value)
def test_reg_with_specified_output_dim_and_single_column_doesnt_crash():
analyser = output_analysers.RegressionAnalyser(name="a", output_dim=1)
dataset = np.random.rand(10, 1)
analyser.update(dataset)
analyser.finalize()
def test_regression_analyser_expected_dim_1d():
analyser = output_analysers.RegressionAnalyser()
analyser.shape = [10] # 1D shape
assert analyser.expected_dim() == 1
def test_regression_analyser_expected_dim_2d():
analyser = output_analysers.RegressionAnalyser()
analyser.shape = [10, 3] # 2D shape
assert analyser.expected_dim() == 3
================================================
FILE: autokeras/auto_model.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from pathlib import Path
from typing import List
from typing import Optional
from typing import Type
from typing import Union
import keras
import numpy as np
import tree
from autokeras import blocks
from autokeras import graph as graph_module
from autokeras import pipeline
from autokeras import tuners
from autokeras.engine import head as head_module
from autokeras.engine import node as node_module
from autokeras.engine import tuner
from autokeras.nodes import Input
from autokeras.utils import data_utils
from autokeras.utils import utils
TUNER_CLASSES = {
"bayesian": tuners.BayesianOptimization,
"random": tuners.RandomSearch,
"hyperband": tuners.Hyperband,
"greedy": tuners.Greedy,
}
def get_tuner_class(tuner):
if isinstance(tuner, str) and tuner in TUNER_CLASSES:
return TUNER_CLASSES.get(tuner)
else:
raise ValueError(
'Expected the tuner argument to be one of "greedy", '
'"random", "hyperband", or "bayesian", '
"but got {tuner}".format(tuner=tuner)
)
class AutoModel(object):
"""A Model defined by inputs and outputs.
AutoModel combines a HyperModel and a Tuner to tune the HyperModel.
The user can use it in a similar way to a Keras model since it
also has `fit()` and `predict()` methods.
The AutoModel has two use cases. In the first case, the user only specifies
the input nodes and output heads of the AutoModel. The AutoModel infers the
rest part of the model. In the second case, user can specify the high-level
architecture of the AutoModel by connecting the Blocks with the functional
API, which is the same as the Keras
[functional
API](https://keras.io/api/models/model/#with-the-functional-api).
# Example
```python
# The user only specifies the input nodes and output heads.
import autokeras as ak
ak.AutoModel(
inputs=[ak.ImageInput(), ak.TextInput()],
outputs=[ak.ClassificationHead(), ak.RegressionHead()]
)
```
```python
# The user specifies the high-level architecture.
import autokeras as ak
image_input = ak.ImageInput()
image_output = ak.ImageBlock()(image_input)
text_input = ak.TextInput()
text_output = ak.TextBlock()(text_input)
output = ak.Merge()([image_output, text_output])
classification_output = ak.ClassificationHead()(output)
regression_output = ak.RegressionHead()(output)
ak.AutoModel(
inputs=[image_input, text_input],
outputs=[classification_output, regression_output]
)
```
# Arguments
inputs: A list of Node instances.
The input node(s) of the AutoModel.
outputs: A list of Node or Head instances.
The output node(s) or head(s) of the AutoModel.
project_name: String. The name of the AutoModel. Defaults to
'auto_model'.
max_trials: Int. The maximum number of different Keras Models to try.
The search may finish before reaching the max_trials. Defaults to
100.
directory: String. The path to a directory for storing the search
outputs. Defaults to None, which would create a folder with the
name of the AutoModel in the current directory.
objective: String. Name of model metric to minimize
or maximize, e.g. 'val_accuracy'. Defaults to 'val_loss'.
tuner: String or subclass of AutoTuner. If string, it should be one of
'greedy', 'bayesian', 'hyperband' or 'random'. It can also be a
subclass of AutoTuner. Defaults to 'greedy'.
overwrite: Boolean. Defaults to `False`. If `False`, reloads an existing
project of the same name if one is found. Otherwise, overwrites the
project.
seed: Int. Random seed.
max_model_size: Int. Maximum number of scalars in the parameters of a
model. Models larger than this are rejected.
**kwargs: Any arguments supported by keras_tuner.Tuner.
"""
def __init__(
self,
inputs: Union[Input, List[Input]],
outputs: Union[head_module.Head, node_module.Node, list],
project_name: str = "auto_model",
max_trials: int = 100,
directory: Union[str, Path, None] = None,
objective: str = "val_loss",
tuner: Union[str, Type[tuner.AutoTuner]] = "greedy",
overwrite: bool = False,
seed: Optional[int] = None,
max_model_size: Optional[int] = None,
**kwargs
):
self.inputs = tree.flatten(inputs)
self.outputs = tree.flatten(outputs)
self.seed = seed
if seed:
np.random.seed(seed)
# TODO: Support passing a tuner instance.
# Initialize the hyper_graph.
graph = self._build_graph()
if isinstance(tuner, str):
tuner = get_tuner_class(tuner)
self.tuner = tuner(
hypermodel=graph,
overwrite=overwrite,
objective=objective,
max_trials=max_trials,
directory=directory,
seed=self.seed,
project_name=project_name,
max_model_size=max_model_size,
**kwargs
)
self.overwrite = overwrite
self._heads = [output_node.in_blocks[0] for output_node in self.outputs]
@property
def objective(self):
return self.tuner.objective
@property
def max_trials(self):
return self.tuner.max_trials
@property
def directory(self):
return self.tuner.directory
@property
def project_name(self):
return self.tuner.project_name
def _assemble(self):
"""Assemble the Blocks based on the input output nodes."""
inputs = tree.flatten(self.inputs)
outputs = tree.flatten(self.outputs)
middle_nodes = [
input_node.get_block()(input_node) for input_node in inputs
]
# Merge the middle nodes.
if len(middle_nodes) > 1:
output_node = blocks.Merge()(middle_nodes)
else:
output_node = middle_nodes[0]
outputs = tree.flatten(
[output_blocks(output_node) for output_blocks in outputs]
)
return graph_module.Graph(inputs=inputs, outputs=outputs)
def _build_graph(self):
# Using functional API.
if all(
[isinstance(output, node_module.Node) for output in self.outputs]
):
graph = graph_module.Graph(inputs=self.inputs, outputs=self.outputs)
# Using input/output API.
elif all(
[isinstance(output, head_module.Head) for output in self.outputs]
):
# Clear session to reset get_uid(). The names of the blocks will
# start to count from 1 for new blocks in a new AutoModel
# afterwards. When initializing multiple AutoModel with Task API,
# if not counting from 1 for each of the AutoModel, the predefined
# hp values in task specifiec tuners would not match the names.
keras.backend.clear_session()
graph = self._assemble()
self.outputs = graph.outputs
keras.backend.clear_session()
return graph
def fit(
self,
x=None,
y=None,
batch_size=32,
epochs=None,
callbacks=None,
validation_split=0.2,
validation_data=None,
verbose=1,
**kwargs
):
"""Search for the best model and hyperparameters for the AutoModel.
It will search for the best model based on the performances on
validation data.
# Arguments
x: numpy.ndarray. Training data x.
y: numpy.ndarray. Training data y.
batch_size: Int. Number of samples per gradient update. Defaults to
32.
epochs: Int. The number of epochs to train each model during the
search. If unspecified, by default we train for a maximum of
1000 epochs, but we stop training if the validation loss stops
improving for 10 epochs (unless you specified an EarlyStopping
callback as part of the callbacks argument, in which case the
EarlyStopping callback you specified will determine early
stopping).
callbacks: List of Keras callbacks to apply during training and
validation.
validation_split: Float between 0 and 1. Defaults to 0.2.
Fraction of the training data to be used as validation data.
The model will set apart this fraction of the training data,
will not train on it, and will evaluate the loss and any model
metrics on this data at the end of each epoch. The validation
data is selected from the last samples in the `x` and `y` data
provided, before shuffling. This argument is not supported when
`x` is a dataset. The best model found would be fit on the
entire dataset including the validation data.
validation_data: Data on which to evaluate the loss and any model
metrics at the end of each epoch. The model will not be trained
on this data. `validation_data` will override
`validation_split`. The type of the validation data should be
the same as the training data. The best model found would be
fit on the training dataset without the validation data.
verbose: 0, 1, or 2. Verbosity mode. 0 = silent, 1 = progress bar,
2 = one line per epoch. Note that the progress bar is not
particularly useful when logged to a file, so verbose=2 is
recommended when not running interactively (eg, in a production
environment). Controls the verbosity of both KerasTuner search
and
[keras.Model.fit](https://keras.io/api/models/model_training_apis/#fit-method)
**kwargs: Any arguments supported by
[keras.Model.fit](https://keras.io/api/models/model_training_apis/#fit-method).
# Returns
history: A Keras History object corresponding to the best model.
Its History.history attribute is a record of training
loss values and metrics values at successive epochs, as well as
validation loss values and validation metrics values (if
applicable).
"""
# Check validation information.
if not validation_data and not validation_split:
raise ValueError(
"Either validation_data or a non-zero validation_split "
"should be provided."
)
if validation_data:
validation_split = 0
dataset, validation_data = self._check_and_adapt(
x=x, y=y, validation_data=validation_data
)
self._analyze_data(dataset)
self._build_hyper_pipeline()
# Split the data with validation_split.
if validation_data is None and validation_split:
dataset, validation_data = data_utils.split_dataset(
dataset, validation_split
)
x, y = dataset
history = self.tuner.search(
x=x,
y=y,
epochs=epochs,
callbacks=callbacks,
validation_data=validation_data,
validation_split=validation_split,
verbose=verbose,
batch_size=batch_size,
**kwargs
)
return history
def _adapt(self, dataset, hms):
sources = tree.flatten(dataset)
adapted = []
for source, hm in zip(sources, hms):
source = hm.get_adapter().adapt(source)
adapted.append(source)
if len(adapted) == 1:
return adapted[0]
return tuple(adapted)
def _check_numpy_arrays(self, data, name, in_val=""):
"""Check if all elements in the nested structure are numpy arrays."""
if not all([isinstance(a, np.ndarray) for a in tree.flatten(data)]):
raise ValueError(
"Expected "
"{name}{in_val} to be a numpy array, got {type}".format(
name=name,
in_val=in_val,
type=[type(a) for a in tree.flatten(data)],
)
)
def _check_array_count(self, actual, expected, name, in_val):
"""Check if the number of arrays matches the expected count."""
if actual != expected:
raise ValueError(
"Expected {name}{in_val} to have {expected} arrays, "
"but got {actual}".format(
name=name,
in_val=in_val,
expected=expected,
actual=actual,
)
)
def _check_data_format(self, x, y, validation=False, predict=False):
"""Check if the dataset has the same number of IOs with the model."""
if validation:
in_val = " in validation_data"
else:
in_val = ""
self._check_numpy_arrays(x, "x", in_val)
if y is not None:
self._check_numpy_arrays(y, "y", in_val)
self._check_array_count(
len(tree.flatten(x)), len(self.inputs), "x", in_val
)
# When predicting, y is not required.
if not predict and y is not None:
self._check_array_count(
len(tree.flatten(y)), len(self.outputs), "y", in_val
)
def _analyze_data(self, dataset):
input_analysers = [node.get_analyser() for node in self.inputs]
output_analysers = [head.get_analyser() for head in self._heads]
analysers = input_analysers + output_analysers
np_arrays = tree.flatten(dataset)
for array, analyser in zip(np_arrays, analysers):
analyser.update(array)
for analyser in analysers:
analyser.finalize()
for hm, analyser in zip(self.inputs + self._heads, analysers):
hm.config_from_analyser(analyser)
def _build_hyper_pipeline(self):
self.tuner.hyper_pipeline = pipeline.HyperPipeline(
inputs=[node.get_hyper_preprocessors() for node in self.inputs],
outputs=[head.get_hyper_preprocessors() for head in self._heads],
)
self.tuner.hypermodel.hyper_pipeline = self.tuner.hyper_pipeline
def _check_and_adapt(self, x, y, validation_data):
# Convert training data.
self._check_data_format(x, y)
x = self._adapt(x, self.inputs)
y = self._adapt(y, self._heads)
# Convert validation data
if validation_data:
self._check_data_format(*validation_data, validation=True)
x_val, y_val = validation_data
x_val = self._adapt(x_val, self.inputs)
y_val = self._adapt(y_val, self._heads)
validation_data = (x_val, y_val)
return (x, y), validation_data
def predict(self, x, batch_size=32, verbose=1, **kwargs):
"""Predict the output for a given testing data.
# Arguments
x: Any allowed types according to the input node. Testing data.
batch_size: Number of samples per batch.
If unspecified, batch_size will default to 32.
verbose: Verbosity mode. 0 = silent, 1 = progress bar.
Controls the verbosity of
[keras.Model.predict](https://keras.io/api/models/model_training_apis/#predict-method)
**kwargs: Any arguments supported by keras.Model.predict.
# Returns
A list of numpy.ndarray objects or a single numpy.ndarray.
The predicted results.
"""
self._check_data_format(x, None, predict=True)
dataset = self._adapt(x, self.inputs)
pipeline = self.tuner.get_best_pipeline()
model = self.tuner.get_best_model()
dataset = pipeline.transform_x(dataset)
y = utils.predict_with_adaptive_batch_size(
model=model,
batch_size=batch_size,
x=dataset,
verbose=verbose,
**kwargs
)
return pipeline.postprocess(y)
def evaluate(self, x, y=None, batch_size=32, verbose=1, **kwargs):
"""Evaluate the best model for the given data.
# Arguments
x: Any allowed types according to the input node. Testing data.
y: Any allowed types according to the head. Testing targets.
Defaults to None.
batch_size: Number of samples per batch.
If unspecified, batch_size will default to 32.
verbose: Verbosity mode. 0 = silent, 1 = progress bar.
Controls the verbosity of
[keras.Model.evaluate](https://keras.io/api/models/model_training_apis/#evaluate-method)
**kwargs: Any arguments supported by keras.Model.evaluate.
# Returns
Scalar test loss (if the model has a single output and no metrics)
or list of scalars (if the model has multiple outputs and/or
metrics). The attribute model.metrics_names will give you the
display labels for the scalar outputs.
"""
self._check_data_format(x, y)
x = self._adapt(x, self.inputs)
y = self._adapt(y, self._heads)
pipeline = self.tuner.get_best_pipeline()
x, y = pipeline.transform((x, y))
model = self.tuner.get_best_model()
return utils.evaluate_with_adaptive_batch_size(
model=model,
batch_size=batch_size,
x=x,
y=y,
verbose=verbose,
**kwargs
)
def export_model(self):
"""Export the best Keras Model.
# Returns
keras.Model instance. The best model found during the search, loaded
with trained weights.
"""
return self.tuner.get_best_model()
================================================
FILE: autokeras/auto_model_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from unittest import mock
import keras_tuner
import numpy as np
import pytest
import autokeras as ak
from autokeras import test_utils
def get_tuner_class(*args, **kwargs):
pipeline = mock.Mock()
pipeline.transform_x.side_effect = lambda x: x
tuner = mock.Mock()
tuner.get_best_pipeline.return_value = pipeline
tuner_class = mock.Mock()
tuner_class.return_value = tuner
return tuner_class
def test_auto_model_objective_is_kt_objective(tmp_path):
auto_model = ak.AutoModel(
ak.ImageInput(), ak.RegressionHead(), directory=tmp_path
)
assert isinstance(auto_model.objective, keras_tuner.Objective)
def test_auto_model_max_trial_field_as_specified(tmp_path):
auto_model = ak.AutoModel(
ak.ImageInput(), ak.RegressionHead(), directory=tmp_path, max_trials=10
)
assert auto_model.max_trials == 10
def test_auto_model_directory_field_as_specified(tmp_path):
auto_model = ak.AutoModel(
ak.ImageInput(), ak.RegressionHead(), directory=tmp_path
)
assert auto_model.directory == tmp_path
def test_auto_model_project_name_field_as_specified(tmp_path):
auto_model = ak.AutoModel(
ak.ImageInput(),
ak.RegressionHead(),
directory=tmp_path,
project_name="auto_model",
)
assert auto_model.project_name == "auto_model"
@mock.patch("autokeras.auto_model.get_tuner_class")
def test_evaluate(tuner_fn, tmp_path):
x_train = np.random.rand(100, 32)
y_train = np.random.rand(100, 1)
input_node = ak.Input()
output_node = input_node
output_node = ak.DenseBlock()(output_node)
output_node = ak.RegressionHead()(output_node)
auto_model = ak.AutoModel(
input_node, output_node, directory=tmp_path, max_trials=1
)
auto_model.fit(
x_train, y_train, epochs=1, validation_data=(x_train, y_train)
)
pipeline = tuner_fn.return_value.return_value.get_best_pipeline.return_value
pipeline.transform.return_value = (x_train, y_train)
auto_model.evaluate(x_train, y_train)
assert tuner_fn.called
def get_single_io_auto_model(tmp_path):
return ak.AutoModel(
ak.ImageInput(), ak.RegressionHead(), directory=tmp_path, max_trials=2
)
@mock.patch("autokeras.auto_model.get_tuner_class", side_effect=get_tuner_class)
def test_auto_model_predict(tuner_fn, tmp_path):
x_train = np.random.rand(100, 32, 32, 3)
y_train = np.random.rand(100, 1)
auto_model = get_single_io_auto_model(tmp_path)
auto_model.fit(x_train, y_train, epochs=2, validation_split=0.2)
auto_model.predict(x_train)
assert tuner_fn.called
@mock.patch("autokeras.auto_model.get_tuner_class")
def test_final_fit_concat(tuner_fn, tmp_path):
tuner = tuner_fn.return_value.return_value
x_train = np.random.rand(100, 32, 32, 3)
y_train = np.random.rand(100, 1)
auto_model = get_single_io_auto_model(tmp_path)
auto_model.fit(x_train, y_train, epochs=2, validation_split=0.2)
assert tuner.search.call_args_list[0][1]["validation_split"]
@mock.patch("autokeras.auto_model.get_tuner_class")
def test_final_fit_not_concat(tuner_fn, tmp_path):
tuner = tuner_fn.return_value.return_value
x_train = np.random.rand(100, 32, 32, 3)
y_train = np.random.rand(100, 1)
auto_model = get_single_io_auto_model(tmp_path)
auto_model.fit(
x_train, y_train, epochs=2, validation_data=(x_train, y_train)
)
assert not tuner.search.call_args_list[0][1]["validation_split"]
@mock.patch("autokeras.auto_model.get_tuner_class")
def test_overwrite(tuner_fn, tmp_path):
tuner_class = tuner_fn.return_value
x_train = np.random.rand(100, 32, 32, 3)
y_train = np.random.rand(100, 1)
auto_model = get_single_io_auto_model(tmp_path)
auto_model.fit(
x_train, y_train, epochs=2, validation_data=(x_train, y_train)
)
assert not tuner_class.call_args_list[0][1]["overwrite"]
@mock.patch("autokeras.auto_model.get_tuner_class")
def test_export_model(tuner_fn, tmp_path):
tuner_class = tuner_fn.return_value
tuner = tuner_class.return_value
x_train = np.random.rand(100, 32, 32, 3)
y_train = np.random.rand(100, 1)
auto_model = get_single_io_auto_model(tmp_path)
auto_model.fit(
x_train, y_train, epochs=2, validation_data=(x_train, y_train)
)
auto_model.export_model()
assert tuner.get_best_model.called
def get_multi_io_auto_model(tmp_path):
return ak.AutoModel(
[ak.ImageInput(), ak.ImageInput()],
[ak.RegressionHead(), ak.RegressionHead()],
directory=tmp_path,
max_trials=2,
overwrite=False,
)
def dataset_error(x, y, validation_data, message, tmp_path):
auto_model = get_multi_io_auto_model(tmp_path)
with pytest.raises(ValueError) as info:
auto_model.fit(x, y, epochs=2, validation_data=validation_data)
assert message in str(info.value)
@mock.patch("autokeras.auto_model.get_tuner_class", side_effect=get_tuner_class)
def test_multi_input_predict(tuner_fn, tmp_path):
auto_model = get_multi_io_auto_model(tmp_path)
x1 = test_utils.generate_data()
y1 = test_utils.generate_data(shape=(1,))
auto_model.fit(
x=(x1, x1), y=(y1, y1), epochs=2, validation_data=((x1, x1), (y1, y1))
)
auto_model.predict(x=((x1, x1),))
@mock.patch("autokeras.auto_model.get_tuner_class", side_effect=get_tuner_class)
def test_multi_input_predict2(tuner_fn, tmp_path):
auto_model = get_multi_io_auto_model(tmp_path)
x1 = test_utils.generate_data()
y1 = test_utils.generate_data(shape=(1,))
auto_model.fit(
x=(x1, x1), y=(y1, y1), epochs=2, validation_data=((x1, x1), (y1, y1))
)
auto_model.predict(x=(x1, x1))
def test_invalid_tuner_name_error(tmp_path):
with pytest.raises(ValueError) as info:
ak.AutoModel(
ak.ImageInput(),
ak.RegressionHead(),
directory=tmp_path,
tuner="unknown",
)
assert "Expected the tuner argument to be one of" in str(info.value)
def test_no_validation_data_nor_split_error(tmp_path):
auto_model = ak.AutoModel(
ak.ImageInput(), ak.RegressionHead(), directory=tmp_path
)
with pytest.raises(ValueError) as info:
auto_model.fit(
x=np.random.rand(100, 32, 32, 3),
y=np.random.rand(100, 1),
validation_split=0,
)
assert "Either validation_data or a non-zero" in str(info.value)
@mock.patch("autokeras.auto_model.get_tuner_class", side_effect=get_tuner_class)
def test_predict_tuple_x_and_tuple_y_predict_doesnt_crash(tuner_fn, tmp_path):
auto_model = ak.AutoModel(
ak.ImageInput(), ak.RegressionHead(), directory=tmp_path
)
x, y = (np.random.rand(100, 32, 32, 3),), (np.random.rand(100, 1),)
auto_model.fit(x, y)
auto_model.predict(x)
def test_fit_with_non_numpy_data_raises_error(tmp_path):
auto_model = ak.AutoModel(
ak.ImageInput(), ak.RegressionHead(), directory=tmp_path
)
x = [[[1, 2, 3]] * 32] * 32 # list, not np.ndarray
y = [1]
match_str = "Expected x to be a numpy array"
with pytest.raises(ValueError, match=match_str):
auto_model.fit(x, y)
def test_fit_with_wrong_array_count_raises_error(tmp_path):
auto_model = ak.AutoModel(
ak.ImageInput(), ak.RegressionHead(), directory=tmp_path
)
x = (
np.random.rand(10, 32, 32, 3),
np.random.rand(10, 32, 32, 3),
) # 2 arrays
y = np.random.rand(10, 1)
match_str = "Expected x to have 1 arrays, but got 2"
with pytest.raises(ValueError, match=match_str):
auto_model.fit(x, y)
================================================
FILE: autokeras/blocks/__init__.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import keras
from autokeras.blocks.basic import ConvBlock
from autokeras.blocks.basic import DenseBlock
from autokeras.blocks.basic import EfficientNetBlock
from autokeras.blocks.basic import Embedding
from autokeras.blocks.basic import ResNetBlock
from autokeras.blocks.basic import RNNBlock
from autokeras.blocks.basic import XceptionBlock
from autokeras.blocks.heads import ClassificationHead
from autokeras.blocks.heads import RegressionHead
from autokeras.blocks.preprocessing import ImageAugmentation
from autokeras.blocks.preprocessing import Normalization
from autokeras.blocks.reduction import Flatten
from autokeras.blocks.reduction import Merge
from autokeras.blocks.reduction import SpatialReduction
from autokeras.blocks.reduction import TemporalReduction
from autokeras.blocks.wrapper import GeneralBlock
from autokeras.blocks.wrapper import ImageBlock
from autokeras.blocks.wrapper import StructuredDataBlock
from autokeras.blocks.wrapper import TextBlock
from autokeras.utils import utils
def serialize(obj):
return utils.serialize_keras_object(obj)
def deserialize(config, custom_objects=None):
return utils.deserialize_keras_object(
config,
module_objects=globals(),
custom_objects=custom_objects,
)
================================================
FILE: autokeras/blocks/basic.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Optional
from typing import Union
import keras
import tree
from keras import applications
from keras import layers
from keras_tuner.engine import hyperparameters
from autokeras.blocks import reduction
from autokeras.engine import block as block_module
from autokeras.utils import io_utils
from autokeras.utils import layer_utils
from autokeras.utils import utils
RESNET_V1 = {
"resnet50": applications.ResNet50,
"resnet101": applications.ResNet101,
"resnet152": applications.ResNet152,
}
RESNET_V2 = {
"resnet50_v2": applications.ResNet50V2,
"resnet101_v2": applications.ResNet101V2,
"resnet152_v2": applications.ResNet152V2,
}
EFFICIENT_VERSIONS = {
"b0": applications.EfficientNetB0,
"b1": applications.EfficientNetB1,
"b2": applications.EfficientNetB2,
"b3": applications.EfficientNetB3,
"b4": applications.EfficientNetB4,
"b5": applications.EfficientNetB5,
"b6": applications.EfficientNetB6,
"b7": applications.EfficientNetB7,
}
PRETRAINED = "pretrained"
@keras.utils.register_keras_serializable(package="autokeras")
class DenseBlock(block_module.Block):
"""Block for Dense layers.
# Arguments
num_layers: Int or keras_tuner.engine.hyperparameters.Choice.
The number of Dense layers in the block.
If left unspecified, it will be tuned automatically.
num_units: Int or keras_tuner.engine.hyperparameters.Choice.
The number of units in each dense layer.
If left unspecified, it will be tuned automatically.
use_bn: Boolean. Whether to use BatchNormalization layers.
If left unspecified, it will be tuned automatically.
dropout: Float or keras_tuner.engine.hyperparameters.Choice.
The dropout rate for the layers.
If left unspecified, it will be tuned automatically.
"""
def __init__(
self,
num_layers: Optional[Union[int, hyperparameters.Choice]] = None,
num_units: Optional[Union[int, hyperparameters.Choice]] = None,
use_batchnorm: Optional[bool] = None,
dropout: Optional[Union[float, hyperparameters.Choice]] = None,
**kwargs,
):
super().__init__(**kwargs)
self.num_layers = utils.get_hyperparameter(
num_layers,
hyperparameters.Choice("num_layers", [1, 2, 3], default=2),
int,
)
self.num_units = utils.get_hyperparameter(
num_units,
hyperparameters.Choice(
"num_units", [16, 32, 64, 128, 256, 512, 1024], default=32
),
int,
)
self.use_batchnorm = use_batchnorm
self.dropout = utils.get_hyperparameter(
dropout,
hyperparameters.Choice("dropout", [0.0, 0.25, 0.5], default=0.0),
float,
)
def get_config(self):
config = super().get_config()
config.update(
{
"num_layers": io_utils.serialize_block_arg(self.num_layers),
"num_units": io_utils.serialize_block_arg(self.num_units),
"use_batchnorm": self.use_batchnorm,
"dropout": io_utils.serialize_block_arg(self.dropout),
}
)
return config
@classmethod
def from_config(cls, config):
config["num_layers"] = io_utils.deserialize_block_arg(
config["num_layers"]
)
config["num_units"] = io_utils.deserialize_block_arg(
config["num_units"]
)
config["dropout"] = io_utils.deserialize_block_arg(config["dropout"])
return cls(**config)
def build(self, hp, inputs=None):
inputs = tree.flatten(inputs)
utils.validate_num_inputs(inputs, 1)
input_node = inputs[0]
output_node = input_node
output_node = reduction.Flatten().build(hp, output_node)
use_batchnorm = self.use_batchnorm
if use_batchnorm is None:
use_batchnorm = hp.Boolean("use_batchnorm", default=False)
for i in range(utils.add_to_hp(self.num_layers, hp)):
units = utils.add_to_hp(self.num_units, hp, "units_{i}".format(i=i))
output_node = layers.Dense(units)(output_node)
if use_batchnorm:
output_node = layers.BatchNormalization()(output_node)
output_node = layers.ReLU()(output_node)
if utils.add_to_hp(self.dropout, hp) > 0:
output_node = layers.Dropout(utils.add_to_hp(self.dropout, hp))(
output_node
)
return output_node
@keras.utils.register_keras_serializable(package="autokeras")
class RNNBlock(block_module.Block):
"""An RNN Block.
# Arguments
return_sequences: Boolean. Whether to return the last output in the
output sequence, or the full sequence. Defaults to False.
bidirectional: Boolean or keras_tuner.engine.hyperparameters.Boolean.
Bidirectional RNN. If left unspecified, it will be
tuned automatically.
num_layers: Int or keras_tuner.engine.hyperparameters.Choice.
The number of layers in RNN. If left unspecified, it will
be tuned automatically.
layer_type: String or or keras_tuner.engine.hyperparameters.Choice.
'gru' or 'lstm'. If left unspecified, it will be tuned
automatically.
"""
def __init__(
self,
return_sequences: bool = False,
bidirectional: Optional[Union[bool, hyperparameters.Boolean]] = None,
num_layers: Optional[Union[int, hyperparameters.Choice]] = None,
layer_type: Optional[Union[str, hyperparameters.Choice]] = None,
**kwargs,
):
super().__init__(**kwargs)
self.return_sequences = return_sequences
self.bidirectional = utils.get_hyperparameter(
bidirectional,
hyperparameters.Boolean("bidirectional", default=True),
bool,
)
self.num_layers = utils.get_hyperparameter(
num_layers,
hyperparameters.Choice("num_layers", [1, 2, 3], default=2),
int,
)
self.layer_type = utils.get_hyperparameter(
layer_type,
hyperparameters.Choice(
"layer_type", ["gru", "lstm"], default="lstm"
),
str,
)
def get_config(self):
config = super().get_config()
config.update(
{
"return_sequences": self.return_sequences,
"bidirectional": io_utils.serialize_block_arg(
self.bidirectional
),
"num_layers": io_utils.serialize_block_arg(self.num_layers),
"layer_type": io_utils.serialize_block_arg(self.layer_type),
}
)
return config
@classmethod
def from_config(cls, config):
config["bidirectional"] = io_utils.deserialize_block_arg(
config["bidirectional"]
)
config["num_layers"] = io_utils.deserialize_block_arg(
config["num_layers"]
)
config["layer_type"] = io_utils.deserialize_block_arg(
config["layer_type"]
)
return cls(**config)
def build(self, hp, inputs=None):
inputs = tree.flatten(inputs)
utils.validate_num_inputs(inputs, 1)
input_node = inputs[0]
shape = list(input_node.shape)
if len(shape) != 3:
raise ValueError(
"Expect the input tensor of RNNBlock to have dimensions of "
"[batch_size, time_steps, vec_len], "
"but got {shape}".format(shape=input_node.shape)
)
feature_size = shape[-1]
output_node = input_node
bidirectional = utils.add_to_hp(self.bidirectional, hp)
layer_type = utils.add_to_hp(self.layer_type, hp)
num_layers = utils.add_to_hp(self.num_layers, hp)
rnn_layers = {"gru": layers.GRU, "lstm": layers.LSTM}
in_layer = rnn_layers[layer_type]
for i in range(num_layers):
return_sequences = True
if i == num_layers - 1:
return_sequences = self.return_sequences
if bidirectional:
output_node = layers.Bidirectional( # pragma: no cover
in_layer(feature_size, return_sequences=return_sequences)
)(output_node)
else:
output_node = in_layer(
feature_size, return_sequences=return_sequences
)(output_node)
return output_node
@keras.utils.register_keras_serializable(package="autokeras")
class ConvBlock(block_module.Block):
"""Block for vanilla ConvNets.
# Arguments
kernel_size: Int or keras_tuner.engine.hyperparameters.Choice.
The size of the kernel.
If left unspecified, it will be tuned automatically.
num_blocks: Int or keras_tuner.engine.hyperparameters.Choice.
The number of conv blocks, each of which may contain
convolutional, max pooling, dropout, and activation. If left
unspecified, it will be tuned automatically.
num_layers: Int or hyperparameters.Choice.
The number of convolutional layers in each block. If left
unspecified, it will be tuned automatically.
filters: Int or keras_tuner.engine.hyperparameters.Choice. The number of
filters in the convolutional layers. If left unspecified, it will
be tuned automatically.
max_pooling: Boolean. Whether to use max pooling layer in each block. If
left unspecified, it will be tuned automatically.
separable: Boolean. Whether to use separable conv layers.
If left unspecified, it will be tuned automatically.
dropout: Float or kerastuner.engine.hyperparameters.
Choice range Between 0 and 1.
The dropout rate after convolutional layers.
If left unspecified, it will be tuned automatically.
"""
def __init__(
self,
kernel_size: Optional[Union[int, hyperparameters.Choice]] = None,
num_blocks: Optional[Union[int, hyperparameters.Choice]] = None,
num_layers: Optional[Union[int, hyperparameters.Choice]] = None,
filters: Optional[Union[int, hyperparameters.Choice]] = None,
max_pooling: Optional[bool] = None,
separable: Optional[bool] = None,
dropout: Optional[Union[float, hyperparameters.Choice]] = None,
**kwargs,
):
super().__init__(**kwargs)
self.kernel_size = utils.get_hyperparameter(
kernel_size,
hyperparameters.Choice("kernel_size", [3, 5, 7], default=3),
int,
)
self.num_blocks = utils.get_hyperparameter(
num_blocks,
hyperparameters.Choice("num_blocks", [1, 2, 3], default=2),
int,
)
self.num_layers = utils.get_hyperparameter(
num_layers,
hyperparameters.Choice("num_layers", [1, 2], default=2),
int,
)
self.filters = utils.get_hyperparameter(
filters,
hyperparameters.Choice(
"filters", [16, 32, 64, 128, 256, 512], default=32
),
int,
)
self.max_pooling = max_pooling
self.separable = separable
self.dropout = utils.get_hyperparameter(
dropout,
hyperparameters.Choice("dropout", [0.0, 0.25, 0.5], default=0.0),
float,
)
def get_config(self):
config = super().get_config()
config.update(
{
"kernel_size": io_utils.serialize_block_arg(self.kernel_size),
"num_blocks": io_utils.serialize_block_arg(self.num_blocks),
"num_layers": io_utils.serialize_block_arg(self.num_layers),
"filters": io_utils.serialize_block_arg(self.filters),
"max_pooling": self.max_pooling,
"separable": self.separable,
"dropout": io_utils.serialize_block_arg(self.dropout),
}
)
return config
@classmethod
def from_config(cls, config):
config["kernel_size"] = io_utils.deserialize_block_arg(
config["kernel_size"]
)
config["num_blocks"] = io_utils.deserialize_block_arg(
config["num_blocks"]
)
config["num_layers"] = io_utils.deserialize_block_arg(
config["num_layers"]
)
config["filters"] = io_utils.deserialize_block_arg(config["filters"])
config["dropout"] = io_utils.deserialize_block_arg(config["dropout"])
return cls(**config)
def build(self, hp, inputs=None):
inputs = tree.flatten(inputs)
utils.validate_num_inputs(inputs, 1)
input_node = inputs[0]
output_node = input_node
kernel_size = utils.add_to_hp(self.kernel_size, hp)
separable = self.separable
if separable is None:
separable = hp.Boolean("separable", default=False)
if separable:
conv = layer_utils.get_sep_conv(
input_node.shape
) # pragma: no cover
else:
conv = layer_utils.get_conv(input_node.shape)
max_pooling = self.max_pooling
if max_pooling is None:
max_pooling = hp.Boolean("max_pooling", default=True)
pool = layer_utils.get_max_pooling(input_node.shape)
for i in range(utils.add_to_hp(self.num_blocks, hp)):
for j in range(utils.add_to_hp(self.num_layers, hp)):
output_node = conv(
utils.add_to_hp(
self.filters, hp, "filters_{i}_{j}".format(i=i, j=j)
),
kernel_size,
padding=self._get_padding(kernel_size, output_node),
activation="relu",
)(output_node)
if max_pooling:
output_node = pool(
kernel_size - 1,
padding=self._get_padding(kernel_size - 1, output_node),
)(output_node)
if utils.add_to_hp(self.dropout, hp) > 0:
output_node = layers.Dropout(utils.add_to_hp(self.dropout, hp))(
output_node
)
return output_node
@staticmethod
def _get_padding(kernel_size, output_node):
if all(kernel_size * 2 <= length for length in output_node.shape[1:-1]):
return "valid"
return "same"
@keras.utils.register_keras_serializable(package="autokeras")
class KerasApplicationBlock(block_module.Block):
"""Blocks extending Keras applications."""
def __init__(self, pretrained, models, min_size, **kwargs):
super().__init__(**kwargs)
self.pretrained = pretrained
self.models = models
self.min_size = min_size
def get_config(self):
config = super().get_config()
config.update({"pretrained": self.pretrained})
return config
def build(self, hp, inputs=None):
input_node = tree.flatten(inputs)[0]
pretrained = self.pretrained
if input_node.shape[3] not in [1, 3]:
if self.pretrained:
raise ValueError(
"When pretrained is set to True, expect input to "
"have 1 or 3 channels, bug got "
"{channels}.".format(channels=input_node.shape[3])
)
pretrained = False
if pretrained is None:
pretrained = hp.Boolean(PRETRAINED, default=False)
if pretrained:
with hp.conditional_scope(PRETRAINED, [True]):
trainable = hp.Boolean("trainable", default=False)
elif pretrained:
trainable = hp.Boolean("trainable", default=False)
if len(self.models) > 1:
version = hp.Choice("version", list(self.models.keys()))
else:
version = list(self.models.keys())[0]
min_size = self.min_size
if hp.Boolean("imagenet_size", default=False):
min_size = 224
if input_node.shape[1] < min_size or input_node.shape[2] < min_size:
input_node = layers.Resizing(
max(min_size, input_node.shape[1]),
max(min_size, input_node.shape[2]),
)(input_node)
if input_node.shape[3] == 1:
input_node = layers.Concatenate()([input_node] * 3)
if input_node.shape[3] != 3:
input_node = layers.Conv2D(
filters=3, kernel_size=1, padding="same"
)(input_node)
if pretrained:
model = self.models[version](weights="imagenet", include_top=False)
model.trainable = trainable
else:
model = self.models[version](
weights=None,
include_top=False,
input_shape=input_node.shape[1:],
)
return model(input_node)
@keras.utils.register_keras_serializable(package="autokeras")
class ResNetBlock(KerasApplicationBlock):
"""Block for ResNet.
# Arguments
version: String. 'v1', 'v2'. The type of ResNet to use.
If left unspecified, it will be tuned automatically.
pretrained: Boolean. Whether to use ImageNet pretrained weights.
If left unspecified, it will be tuned automatically.
"""
def __init__(
self,
version: Optional[str] = None,
pretrained: Optional[bool] = None,
**kwargs,
):
if version is None:
models = {**RESNET_V1, **RESNET_V2}
elif version == "v1":
models = RESNET_V1
elif version == "v2":
models = RESNET_V2
else:
raise ValueError(
'Expect version to be "v1", or "v2", but got '
"{version}.".format(version=version)
)
super().__init__(
pretrained=pretrained, models=models, min_size=32, **kwargs
)
self.version = version
def get_config(self):
config = super().get_config()
config.update({"version": self.version})
return config
@keras.utils.register_keras_serializable(package="autokeras")
class XceptionBlock(KerasApplicationBlock):
"""Block for XceptionNet.
An Xception structure, used for specifying your model with specific
datasets.
The original Xception architecture is from
[https://arxiv.org/abs/1610.02357](https://arxiv.org/abs/1610.02357).
The data first goes through the entry flow, then through the middle flow
which is repeated eight times, and finally through the exit flow.
This XceptionBlock returns a similar architecture as Xception except without
the last (optional) fully connected layer(s) and logistic regression.
The size of this architecture could be decided by `HyperParameters`, to get
an architecture with a half, an identical, or a double size of the original
one.
# Arguments
pretrained: Boolean. Whether to use ImageNet pretrained weights.
If left unspecified, it will be tuned automatically.
"""
def __init__(self, pretrained: Optional[bool] = None, **kwargs):
super().__init__(
pretrained=pretrained,
models={"xception": applications.Xception},
min_size=71,
**kwargs,
)
@keras.utils.register_keras_serializable(package="autokeras")
class EfficientNetBlock(KerasApplicationBlock):
"""Block for EfficientNet.
# Arguments
version: String. The value should be one of 'b0', 'b1', ..., 'b7'. The
type of EfficientNet to use. If left unspecified, it will be tuned
automatically.
pretrained: Boolean. Whether to use ImageNet pretrained weights.
If left unspecified, it will be tuned automatically.
"""
def __init__(
self,
version: Optional[str] = None,
pretrained: Optional[bool] = None,
**kwargs,
):
if version is None:
models = EFFICIENT_VERSIONS
elif version in EFFICIENT_VERSIONS.keys():
models = {version: EFFICIENT_VERSIONS[version]}
else:
raise ValueError(
"Expect version to be in {expect}, but got "
"{version}.".format(
expect=list(EFFICIENT_VERSIONS.keys()), version=version
)
)
super().__init__(
pretrained=pretrained,
models=models,
min_size=32,
**kwargs,
)
self.version = version
@keras.utils.register_keras_serializable(package="autokeras")
class Embedding(block_module.Block):
"""Word embedding block for sequences.
The input should be tokenized sequences with the same length, where each
element of a sequence should be the index of the word.
# Arguments
max_features: Int. Size of the vocabulary. Must be set if not using
TextToIntSequence before this block. Defaults to 20001.
embedding_dim: Int or keras_tuner.engine.hyperparameters.Choice.
Output dimension of the Attention block.
If left unspecified, it will be tuned automatically.
dropout: Float or keras_tuner.engine.hyperparameters.Choice.
The dropout rate for the layers.
If left unspecified, it will be tuned automatically.
"""
def __init__(
self,
max_features: int = 20001,
embedding_dim: Optional[Union[int, hyperparameters.Choice]] = None,
dropout: Optional[Union[float, hyperparameters.Choice]] = None,
**kwargs,
):
super().__init__(**kwargs)
self.max_features = max_features
self.embedding_dim = utils.get_hyperparameter(
embedding_dim,
hyperparameters.Choice(
"embedding_dim", [32, 64, 128, 256, 512], default=128
),
int,
)
self.dropout = utils.get_hyperparameter(
dropout,
hyperparameters.Choice("dropout", [0.0, 0.25, 0.5], default=0.25),
float,
)
def get_config(self):
config = super().get_config()
config.update(
{
"max_features": self.max_features,
"embedding_dim": io_utils.serialize_block_arg(
self.embedding_dim
),
"dropout": io_utils.serialize_block_arg(self.dropout),
}
)
return config
@classmethod
def from_config(cls, config):
config["dropout"] = io_utils.deserialize_block_arg(config["dropout"])
config["embedding_dim"] = io_utils.deserialize_block_arg(
config["embedding_dim"]
)
return cls(**config)
def build(self, hp, inputs=None):
input_node = tree.flatten(inputs)[0]
embedding_dim = utils.add_to_hp(self.embedding_dim, hp)
layer = layers.Embedding(
input_dim=self.max_features, output_dim=embedding_dim
)
output_node = layer(input_node)
dropout = utils.add_to_hp(self.dropout, hp)
if dropout > 0:
output_node = layers.Dropout(dropout)(output_node)
return output_node
================================================
FILE: autokeras/blocks/basic_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import keras
import keras_tuner
import pytest
import tree
from autokeras import blocks
from autokeras import test_utils
def test_resnet_build_return_tensor():
block = blocks.ResNetBlock()
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32, 32, 3), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_resnet_v1_return_tensor():
block = blocks.ResNetBlock(version="v1")
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32, 32, 3), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_efficientnet_b0_return_tensor():
block = blocks.EfficientNetBlock(version="b0", pretrained=False)
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32, 32, 3), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_resnet_pretrained_build_return_tensor():
block = blocks.ResNetBlock(pretrained=True)
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32, 32, 3), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_resnet_pretrained_with_one_channel_input():
block = blocks.ResNetBlock(pretrained=True)
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(28, 28, 1), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_resnet_pretrained_error_with_two_channels():
block = blocks.ResNetBlock(pretrained=True)
with pytest.raises(ValueError) as info:
block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(224, 224, 2), dtype="float32"),
)
assert "When pretrained is set to True" in str(info.value)
def test_resnet_deserialize_to_resnet():
serialized_block = blocks.serialize(blocks.ResNetBlock())
block = blocks.deserialize(serialized_block)
assert isinstance(block, blocks.ResNetBlock)
def test_resnet_get_config_has_all_attributes():
block = blocks.ResNetBlock()
config = block.get_config()
assert test_utils.get_func_args(blocks.ResNetBlock.__init__).issubset(
config.keys()
)
def test_resnet_wrong_version_error():
with pytest.raises(ValueError) as info:
blocks.ResNetBlock(version="abc")
assert "Expect version to be" in str(info.value)
def test_efficientnet_wrong_version_error():
with pytest.raises(ValueError) as info:
blocks.EfficientNetBlock(version="abc")
assert "Expect version to be" in str(info.value)
def test_xception_build_return_tensor():
block = blocks.XceptionBlock()
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32, 32, 2), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_xception_pretrained_build_return_tensor():
block = blocks.XceptionBlock(pretrained=True)
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32, 32, 3), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_xception_pretrained_with_one_channel_input():
block = blocks.XceptionBlock(pretrained=True)
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(224, 224, 1), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_xception_pretrained_error_with_two_channels():
block = blocks.XceptionBlock(pretrained=True)
with pytest.raises(ValueError) as info:
block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(224, 224, 2), dtype="float32"),
)
assert "When pretrained is set to True" in str(info.value)
def test_xception_deserialize_to_xception():
serialized_block = blocks.serialize(blocks.XceptionBlock())
block = blocks.deserialize(serialized_block)
assert isinstance(block, blocks.XceptionBlock)
def test_xception_get_config_has_all_attributes():
block = blocks.XceptionBlock()
config = block.get_config()
assert test_utils.get_func_args(blocks.XceptionBlock.__init__).issubset(
config.keys()
)
def test_conv_build_return_tensor():
block = blocks.ConvBlock()
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32, 32, 3), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_conv_with_small_image_size_return_tensor():
block = blocks.ConvBlock()
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(10, 10, 3), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_conv_build_with_dropout_return_tensor():
block = blocks.ConvBlock(dropout=0.5)
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32, 32, 3), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_conv_deserialize_to_conv():
serialized_block = blocks.serialize(blocks.ConvBlock())
block = blocks.deserialize(serialized_block)
assert isinstance(block, blocks.ConvBlock)
def test_conv_get_config_has_all_attributes():
block = blocks.ConvBlock()
config = block.get_config()
assert test_utils.get_func_args(blocks.ConvBlock.__init__).issubset(
config.keys()
)
def test_rnn_build_return_tensor():
block = blocks.RNNBlock(bidirectional=False)
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32, 10), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_rnn_input_shape_one_dim_error():
block = blocks.RNNBlock()
with pytest.raises(ValueError) as info:
block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32,), dtype="float32"),
)
assert "Expect the input tensor of RNNBlock" in str(info.value)
def test_rnn_deserialize_to_rnn():
serialized_block = blocks.serialize(blocks.RNNBlock())
block = blocks.deserialize(serialized_block)
assert isinstance(block, blocks.RNNBlock)
def test_rnn_get_config_has_all_attributes():
block = blocks.RNNBlock()
config = block.get_config()
assert test_utils.get_func_args(blocks.RNNBlock.__init__).issubset(
config.keys()
)
def test_dense_build_return_tensor():
block = blocks.DenseBlock(
num_units=keras_tuner.engine.hyperparameters.Choice(
"num_units", [10, 20]
)
)
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32,), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_dense_build_with_dropout_return_tensor():
block = blocks.DenseBlock(dropout=0.5)
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32,), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_dense_build_with_bn_return_tensor():
block = blocks.DenseBlock(use_batchnorm=True)
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32,), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_dense_deserialize_to_dense():
serialized_block = blocks.serialize(blocks.DenseBlock())
block = blocks.deserialize(serialized_block)
assert isinstance(block, blocks.DenseBlock)
def test_dense_get_config_has_all_attributes():
block = blocks.DenseBlock()
config = block.get_config()
assert test_utils.get_func_args(blocks.DenseBlock.__init__).issubset(
config.keys()
)
def test_embed_build_return_tensor():
block = blocks.Embedding()
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32,), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_embed_deserialize_to_embed():
serialized_block = blocks.serialize(blocks.Embedding())
block = blocks.deserialize(serialized_block)
assert isinstance(block, blocks.Embedding)
def test_embed_get_config_has_all_attributes():
block = blocks.Embedding()
config = block.get_config()
assert test_utils.get_func_args(blocks.Embedding.__init__).issubset(
config.keys()
)
================================================
FILE: autokeras/blocks/heads.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Optional
import keras
import tree
from keras import activations
from keras import layers
from keras import losses
from autokeras import adapters
from autokeras import analysers
from autokeras import hyper_preprocessors as hpps_module
from autokeras import preprocessors
from autokeras.blocks import reduction
from autokeras.engine import head as head_module
from autokeras.utils import types
from autokeras.utils import utils
@keras.utils.register_keras_serializable(package="autokeras")
class ClassificationHead(head_module.Head):
"""Classification Dense layers.
Use sigmoid and binary crossentropy for binary classification and
multi-label classification. Use softmax and categorical crossentropy for
multi-class (more than 2) classification. Use Accuracy as metrics by
default.
The targets passing to the head would have to be np.ndarray. It can be raw
labels, one-hot encoded if more than two classes, or binary encoded for
binary classification.
The raw labels will be encoded to one column if two classes were found,
or one-hot encoded if more than two classes were found.
# Arguments
num_classes: Int. Defaults to None. If None, it will be inferred from
the data.
multi_label: Boolean. Defaults to False.
loss: A Keras loss function. Defaults to use `binary_crossentropy` or
`categorical_crossentropy` based on the number of classes.
metrics: A list of Keras metrics. Defaults to use 'accuracy'.
dropout: Float. The dropout rate for the layers.
If left unspecified, it will be tuned automatically.
"""
def __init__(
self,
num_classes: Optional[int] = None,
multi_label: bool = False,
loss: Optional[types.LossType] = None,
metrics: Optional[types.MetricsType] = None,
dropout: Optional[float] = None,
**kwargs
):
self.num_classes = num_classes
self.multi_label = multi_label
self.dropout = dropout
if metrics is None:
metrics = ["accuracy"]
if loss is None:
loss = self.infer_loss()
super().__init__(loss=loss, metrics=metrics, **kwargs)
# Infered from analyser.
self._encoded = None
self._encoded_for_sigmoid = None
self._encoded_for_softmax = None
self._add_one_dimension = False
self._labels = None
def infer_loss(self):
if not self.num_classes:
return None
if self.num_classes == 2 or self.multi_label:
return losses.BinaryCrossentropy()
return losses.CategoricalCrossentropy()
def get_config(self):
config = super().get_config()
config.update(
{
"num_classes": self.num_classes,
"multi_label": self.multi_label,
"dropout": self.dropout,
}
)
return config
def build(self, hp, inputs=None):
inputs = tree.flatten(inputs)
utils.validate_num_inputs(inputs, 1)
input_node = inputs[0]
output_node = input_node
# Reduce the tensor to a vector.
if len(output_node.shape) > 2:
output_node = reduction.SpatialReduction().build(hp, output_node)
if self.dropout is not None:
dropout = self.dropout
else:
dropout = hp.Choice("dropout", [0.0, 0.25, 0.5], default=0)
if dropout > 0:
output_node = layers.Dropout(dropout)(output_node)
output_node = layers.Dense(self.shape[-1])(output_node)
if isinstance(self.loss, keras.losses.BinaryCrossentropy):
output_node = layers.Activation(
activations.sigmoid, name=self.name
)(output_node)
else:
output_node = layers.Softmax(name=self.name)(output_node)
return output_node
def get_adapter(self):
return adapters.ClassificationAdapter(name=self.name)
def get_analyser(self):
return analysers.ClassificationAnalyser(
name=self.name, multi_label=self.multi_label
)
def config_from_analyser(self, analyser):
super().config_from_analyser(analyser)
self.num_classes = analyser.num_classes
self.loss = self.infer_loss()
self._encoded = analyser.encoded
self._encoded_for_sigmoid = analyser.encoded_for_sigmoid
self._encoded_for_softmax = analyser.encoded_for_softmax
self._add_one_dimension = len(analyser.shape) == 1
self._labels = analyser.labels
def get_hyper_preprocessors(self):
hyper_preprocessors = []
if self._add_one_dimension:
hyper_preprocessors.append(
hpps_module.DefaultHyperPreprocessor(
preprocessors.AddOneDimension()
)
)
if self.dtype in ["uint8", "uint16", "uint32", "uint64"]:
hyper_preprocessors.append(
hpps_module.DefaultHyperPreprocessor(
preprocessors.CastToInt32()
)
)
if not self._encoded and self.dtype != "string":
hyper_preprocessors.append(
hpps_module.DefaultHyperPreprocessor(
preprocessors.CastToString()
)
)
if self._encoded_for_sigmoid:
hyper_preprocessors.append(
hpps_module.DefaultHyperPreprocessor(
preprocessors.SigmoidPostprocessor()
)
)
elif self._encoded_for_softmax:
hyper_preprocessors.append(
hpps_module.DefaultHyperPreprocessor(
preprocessors.SoftmaxPostprocessor()
)
)
elif self.num_classes == 2:
hyper_preprocessors.append(
hpps_module.DefaultHyperPreprocessor(
preprocessors.LabelEncoder(self._labels)
)
)
else:
hyper_preprocessors.append(
hpps_module.DefaultHyperPreprocessor(
preprocessors.OneHotEncoder(self._labels)
)
)
return hyper_preprocessors
@keras.utils.register_keras_serializable(package="autokeras")
class RegressionHead(head_module.Head):
"""Regression Dense layers.
The targets passing to the head would have to be np.ndarray. It can be
single-column or multi-column. The values should all be numerical.
# Arguments
output_dim: Int. The number of output dimensions. Defaults to None.
If None, it will be inferred from the data.
multi_label: Boolean. Defaults to False.
loss: A Keras loss function. Defaults to use `mean_squared_error`.
metrics: A list of Keras metrics. Defaults to use `mean_squared_error`.
dropout: Float. The dropout rate for the layers.
If left unspecified, it will be tuned automatically.
"""
def __init__(
self,
output_dim: Optional[int] = None,
loss: types.LossType = "mean_squared_error",
metrics: Optional[types.MetricsType] = None,
dropout: Optional[float] = None,
**kwargs
):
if metrics is None:
metrics = ["mean_squared_error"]
super().__init__(loss=loss, metrics=metrics, **kwargs)
self.output_dim = output_dim
self.dropout = dropout
def get_config(self):
config = super().get_config()
config.update({"output_dim": self.output_dim, "dropout": self.dropout})
return config
def build(self, hp, inputs=None):
inputs = tree.flatten(inputs)
utils.validate_num_inputs(inputs, 1)
input_node = inputs[0]
output_node = input_node
if self.dropout is not None:
dropout = self.dropout
else:
dropout = hp.Choice("dropout", [0.0, 0.25, 0.5], default=0)
if dropout > 0:
output_node = layers.Dropout(dropout)(output_node)
output_node = reduction.Flatten().build(hp, output_node)
output_node = layers.Dense(self.shape[-1], name=self.name)(output_node)
return output_node
def config_from_analyser(self, analyser):
super().config_from_analyser(analyser)
self._add_one_dimension = len(analyser.shape) == 1
def get_adapter(self):
return adapters.RegressionAdapter(name=self.name)
def get_analyser(self):
return analysers.RegressionAnalyser(
name=self.name, output_dim=self.output_dim
)
def get_hyper_preprocessors(self):
hyper_preprocessors = []
if self._add_one_dimension:
hyper_preprocessors.append( # pragma: no cover
hpps_module.DefaultHyperPreprocessor(
preprocessors.AddOneDimension()
)
)
return hyper_preprocessors
================================================
FILE: autokeras/blocks/heads_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import keras
import keras_tuner
import numpy as np
import tree
from autokeras import hyper_preprocessors
from autokeras import nodes as input_module
from autokeras import preprocessors
from autokeras import test_utils
from autokeras.blocks import heads as head_module
def test_two_classes_infer_binary_crossentropy():
dataset = np.array(["a", "a", "a", "b"])
head = head_module.ClassificationHead(name="a", shape=(1,))
adapter = head.get_adapter()
dataset = adapter.adapt(dataset)
analyser = head.get_analyser()
analyser.update(dataset)
analyser.finalize()
head.config_from_analyser(analyser)
head.build(
keras_tuner.HyperParameters(),
input_module.Input(shape=(32,)).build_node(
keras_tuner.HyperParameters()
),
)
assert head.loss.name == "binary_crossentropy"
def test_three_classes_infer_categorical_crossentropy():
dataset = np.array(["a", "a", "c", "b"])
head = head_module.ClassificationHead(name="a", shape=(1,))
adapter = head.get_adapter()
dataset = adapter.adapt(dataset)
analyser = head.get_analyser()
analyser.update(dataset)
analyser.finalize()
head.config_from_analyser(analyser)
head.build(
keras_tuner.HyperParameters(),
input_module.Input(shape=(32,)).build_node(
keras_tuner.HyperParameters()
),
)
assert head.loss.name == "categorical_crossentropy"
def test_multi_label_loss():
head = head_module.ClassificationHead(
name="a", multi_label=True, num_classes=8, shape=(8,)
)
input_node = keras.Input(shape=(5,))
output_node = head.build(keras_tuner.HyperParameters(), input_node)
model = keras.Model(input_node, output_node)
assert model.layers[-1].activation.__name__ == "sigmoid"
assert head.loss.name == "binary_crossentropy"
def test_clf_head_get_sigmoid_postprocessor():
head = head_module.ClassificationHead(name="a", multi_label=True)
head._encoded = True
head._encoded_for_sigmoid = True
assert isinstance(
head.get_hyper_preprocessors()[0].preprocessor,
preprocessors.SigmoidPostprocessor,
)
def test_clf_head_with_2_clases_get_label_encoder():
head = head_module.ClassificationHead(name="a", num_classes=2)
head._encoded = False
head._labels = ["a", "b"]
assert isinstance(
head.get_hyper_preprocessors()[-1].preprocessor,
preprocessors.LabelEncoder,
)
def test_clf_head_build_with_zero_dropout_return_tensor():
block = head_module.ClassificationHead(dropout=0, shape=(8,))
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(5,), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_clf_head_hpps_with_uint8_contain_cast_to_int32():
dataset = test_utils.generate_one_hot_labels(100, 10)
dataset = dataset.astype("uint8")
head = head_module.ClassificationHead(shape=(8,))
analyser = head.get_analyser()
for data in dataset:
analyser.update(data)
analyser.finalize()
head.config_from_analyser(analyser)
assert any(
[
isinstance(hpp, hyper_preprocessors.DefaultHyperPreprocessor)
and isinstance(hpp.preprocessor, preprocessors.CastToInt32)
for hpp in head.get_hyper_preprocessors()
]
)
def test_reg_head_build_with_zero_dropout_return_tensor():
block = head_module.RegressionHead(dropout=0, shape=(8,))
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(5,), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
================================================
FILE: autokeras/blocks/preprocessing.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Blocks for data preprocessing.
They are built into keras preprocessing layers and will be part of the Keras
model.
"""
from typing import Optional
from typing import Tuple
from typing import Union
import keras
import tree
from keras import layers
from keras_tuner.engine import hyperparameters
from autokeras.engine import block as block_module
from autokeras.utils import io_utils
from autokeras.utils import utils
class Normalization(block_module.Block):
"""Perform feature-wise normalization on data.
Refer to Normalization layer in keras preprocessing layers for more
information.
# Arguments
axis: Integer or tuple of integers, the axis or axes that should be
normalized (typically the features axis). We will normalize each
element in the specified axis. The default is '-1' (the innermost
axis); 0 (the batch axis) is not allowed.
"""
def __init__(self, axis: int = -1, **kwargs):
super().__init__(**kwargs)
self.axis = axis
def build(self, hp, inputs=None):
input_node = tree.flatten(inputs)[0]
return layers.Normalization(axis=self.axis)(input_node)
def get_config(self):
config = super().get_config()
config.update({"axis": self.axis})
return config
@keras.utils.register_keras_serializable(package="autokeras")
class ImageAugmentation(block_module.Block):
"""Collection of various image augmentation methods.
# Arguments
translation_factor: A positive float represented as fraction value, or a
tuple of 2 representing fraction for translation vertically and
horizontally, or a kerastuner.engine.hyperparameters.Choice range
of positive floats. For instance, `translation_factor=0.2` result
in a random translation factor within 20% of the width and height.
If left unspecified, it will be tuned automatically.
vertical_flip: Boolean. Whether to flip the image vertically.
If left unspecified, it will be tuned automatically.
horizontal_flip: Boolean. Whether to flip the image horizontally.
If left unspecified, it will be tuned automatically.
rotation_factor: Float or kerastuner.engine.hyperparameters.Choice range
between [0, 1]. A positive float represented as fraction of 2pi
upper bound for rotating clockwise and counter-clockwise. When
represented as a single float, lower = upper.
If left unspecified, it will be tuned automatically.
zoom_factor: A positive float represented as fraction value, or a tuple
of 2 representing fraction for zooming vertically and horizontally,
or a kerastuner.engine.hyperparameters.Choice range of positive
floats. For instance, `zoom_factor=0.2` result in a random zoom
factor from 80% to 120%. If left unspecified, it will be tuned
automatically.
contrast_factor: A positive float represented as fraction of value, or a
tuple of size 2 representing lower and upper bound, or a
kerastuner.engine.hyperparameters.Choice range of floats to find the
optimal value. When represented as a single float, lower = upper.
The contrast factor will be randomly picked
between [1.0 - lower, 1.0 + upper]. If left unspecified, it will be
tuned automatically.
"""
def __init__(
self,
translation_factor: Optional[
Union[float, Tuple[float, float], hyperparameters.Choice]
] = None,
vertical_flip: Optional[bool] = None,
horizontal_flip: Optional[bool] = None,
rotation_factor: Optional[Union[float, hyperparameters.Choice]] = None,
zoom_factor: Optional[
Union[float, Tuple[float, float], hyperparameters.Choice]
] = None,
contrast_factor: Optional[
Union[float, Tuple[float, float], hyperparameters.Choice]
] = None,
**kwargs
):
super().__init__(**kwargs)
self.translation_factor = utils.get_hyperparameter(
translation_factor,
hyperparameters.Choice("translation_factor", [0.0, 0.1]),
Union[float, Tuple[float, float]],
)
self.horizontal_flip = horizontal_flip
self.vertical_flip = vertical_flip
self.rotation_factor = utils.get_hyperparameter(
rotation_factor,
hyperparameters.Choice("rotation_factor", [0.0, 0.1]),
float,
)
self.zoom_factor = utils.get_hyperparameter(
zoom_factor,
hyperparameters.Choice("zoom_factor", [0.0, 0.1]),
Union[float, Tuple[float, float]],
)
self.contrast_factor = utils.get_hyperparameter(
contrast_factor,
hyperparameters.Choice("contrast_factor", [0.0, 0.1]),
Union[float, Tuple[float, float]],
)
@staticmethod
def _get_fraction_value(value):
if isinstance(value, tuple):
return value
return value, value
def build(self, hp, inputs=None):
input_node = tree.flatten(inputs)[0]
output_node = input_node
# Translate
translation_factor = utils.add_to_hp(self.translation_factor, hp)
if translation_factor not in [0, (0, 0)]:
height_factor, width_factor = self._get_fraction_value(
translation_factor
)
output_node = layers.RandomTranslation(height_factor, width_factor)(
output_node
)
# Flip
horizontal_flip = self.horizontal_flip
if horizontal_flip is None:
horizontal_flip = hp.Boolean("horizontal_flip", default=True)
vertical_flip = self.vertical_flip
if self.vertical_flip is None:
vertical_flip = hp.Boolean("vertical_flip", default=True)
if not horizontal_flip and not vertical_flip:
flip_mode = ""
elif horizontal_flip and vertical_flip:
flip_mode = "horizontal_and_vertical"
elif horizontal_flip and not vertical_flip:
flip_mode = "horizontal"
elif not horizontal_flip and vertical_flip:
flip_mode = "vertical"
if flip_mode != "":
output_node = layers.RandomFlip(mode=flip_mode)(output_node)
# Rotate
rotation_factor = utils.add_to_hp(self.rotation_factor, hp)
if rotation_factor != 0:
output_node = layers.RandomRotation(rotation_factor)(output_node)
# Zoom
zoom_factor = utils.add_to_hp(self.zoom_factor, hp)
if zoom_factor not in [0, (0, 0)]:
height_factor, width_factor = self._get_fraction_value(zoom_factor)
# TODO: Add back RandomZoom when it is ready.
# output_node = layers.RandomZoom(
# height_factor, width_factor)(output_node)
# Contrast
contrast_factor = utils.add_to_hp(self.contrast_factor, hp)
if contrast_factor not in [0, (0, 0)]:
output_node = layers.RandomContrast(contrast_factor)(output_node)
return output_node
def get_config(self):
config = super().get_config()
config.update(
{
"translation_factor": io_utils.serialize_block_arg(
self.translation_factor
),
"horizontal_flip": self.horizontal_flip,
"vertical_flip": self.vertical_flip,
"rotation_factor": io_utils.serialize_block_arg(
self.rotation_factor
),
"zoom_factor": io_utils.serialize_block_arg(self.zoom_factor),
"contrast_factor": io_utils.serialize_block_arg(
self.contrast_factor
),
}
)
return config
@classmethod
def from_config(cls, config):
config["translation_factor"] = io_utils.deserialize_block_arg(
config["translation_factor"]
)
config["rotation_factor"] = io_utils.deserialize_block_arg(
config["rotation_factor"]
)
config["zoom_factor"] = io_utils.deserialize_block_arg(
config["zoom_factor"]
)
config["contrast_factor"] = io_utils.deserialize_block_arg(
config["contrast_factor"]
)
return cls(**config)
================================================
FILE: autokeras/blocks/preprocessing_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import keras
import keras_tuner
import tree
from keras_tuner.engine import hyperparameters
from autokeras import blocks
from autokeras import test_utils
def test_augment_build_return_tensor():
block = blocks.ImageAugmentation(rotation_factor=0.2)
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32, 32, 3), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_augment_build_with_translation_factor_range_return_tensor():
block = blocks.ImageAugmentation(translation_factor=(0, 0.1))
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32, 32, 3), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_augment_build_with_no_flip_return_tensor():
block = blocks.ImageAugmentation(vertical_flip=False, horizontal_flip=False)
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32, 32, 3), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_augment_build_with_vflip_only_return_tensor():
block = blocks.ImageAugmentation(vertical_flip=True, horizontal_flip=False)
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32, 32, 3), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_augment_build_with_zoom_factor_return_tensor():
block = blocks.ImageAugmentation(zoom_factor=0.1)
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32, 32, 3), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_augment_build_with_contrast_factor_return_tensor():
block = blocks.ImageAugmentation(contrast_factor=0.1)
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32, 32, 3), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_augment_deserialize_to_augment():
serialized_block = blocks.serialize(
blocks.ImageAugmentation(
zoom_factor=0.1,
contrast_factor=hyperparameters.Float("contrast_factor", 0.1, 0.5),
)
)
block = blocks.deserialize(serialized_block)
assert isinstance(block, blocks.ImageAugmentation)
assert block.zoom_factor == 0.1
assert isinstance(block.contrast_factor, hyperparameters.Float)
def test_augment_get_config_has_all_attributes():
block = blocks.ImageAugmentation()
config = block.get_config()
assert test_utils.get_func_args(blocks.ImageAugmentation.__init__).issubset(
config.keys()
)
================================================
FILE: autokeras/blocks/reduction.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Optional
import keras
import tree
from keras import layers
from keras import ops
from autokeras.engine import block as block_module
from autokeras.utils import layer_utils
from autokeras.utils import utils
REDUCTION_TYPE = "reduction_type"
FLATTEN = "flatten"
GLOBAL_MAX = "global_max"
GLOBAL_AVG = "global_avg"
def shape_compatible(shape1, shape2):
if len(shape1) != len(shape2):
return False
# TODO: If they can be the same after passing through any layer,
# they are compatible. e.g. (32, 32, 3), (16, 16, 2) are compatible
return shape1[:-1] == shape2[:-1]
@keras.utils.register_keras_serializable(package="autokeras")
class Merge(block_module.Block):
"""Merge block to merge multiple nodes into one.
# Arguments
merge_type: String. 'add' or 'concatenate'. If left unspecified, it will
be tuned automatically.
"""
def __init__(self, merge_type: Optional[str] = None, **kwargs):
super().__init__(**kwargs)
self.merge_type = merge_type
def get_config(self):
config = super().get_config()
config.update({"merge_type": self.merge_type})
return config
def build(self, hp, inputs=None):
inputs = tree.flatten(inputs)
if len(inputs) == 1:
return inputs
if not all(
[
shape_compatible(input_node.shape, inputs[0].shape)
for input_node in inputs
]
):
inputs = [Flatten().build(hp, input_node) for input_node in inputs]
# TODO: Even inputs have different shape[-1], they can still be Add(
# ) after another layer. Check if the inputs are all of the same
# shape
if self._inputs_same_shape(inputs):
merge_type = self.merge_type or hp.Choice(
"merge_type", ["add", "concatenate"], default="add"
)
if merge_type == "add":
return layers.Add()(inputs)
return layers.Concatenate()(inputs)
def _inputs_same_shape(self, inputs):
return all(input_node.shape == inputs[0].shape for input_node in inputs)
@keras.utils.register_keras_serializable(package="autokeras")
class Flatten(block_module.Block):
"""Flatten the input tensor with Keras Flatten layer."""
def build(self, hp, inputs=None):
inputs = tree.flatten(inputs)
utils.validate_num_inputs(inputs, 1)
input_node = inputs[0]
if len(input_node.shape) > 2:
return layers.Flatten()(input_node)
return input_node
@keras.utils.register_keras_serializable(package="autokeras")
class Reduction(block_module.Block):
def __init__(self, reduction_type: Optional[str] = None, **kwargs):
super().__init__(**kwargs)
self.reduction_type = reduction_type
def get_config(self):
config = super().get_config()
config.update({REDUCTION_TYPE: self.reduction_type})
return config
def global_max(self, input_node):
raise NotImplementedError
def global_avg(self, input_node):
raise NotImplementedError
def build(self, hp, inputs=None):
inputs = tree.flatten(inputs)
utils.validate_num_inputs(inputs, 1)
input_node = inputs[0]
output_node = input_node
# No need to reduce.
if len(output_node.shape) <= 2:
return output_node
if self.reduction_type is not None:
return self._build_block(hp, output_node, self.reduction_type)
reduction_type = hp.Choice(
REDUCTION_TYPE, [FLATTEN, GLOBAL_MAX, GLOBAL_AVG]
)
with hp.conditional_scope(REDUCTION_TYPE, [reduction_type]):
return self._build_block(hp, output_node, reduction_type)
def _build_block(self, hp, output_node, reduction_type):
if reduction_type == FLATTEN:
output_node = Flatten().build(hp, output_node)
elif reduction_type == GLOBAL_MAX:
output_node = self.global_max(output_node)
elif reduction_type == GLOBAL_AVG:
output_node = self.global_avg(output_node)
return output_node
@keras.utils.register_keras_serializable(package="autokeras")
class SpatialReduction(Reduction):
"""Reduce the dimension of a spatial tensor, e.g. image, to a vector.
# Arguments
reduction_type: String. 'flatten', 'global_max' or 'global_avg'.
If left unspecified, it will be tuned automatically.
"""
def __init__(self, reduction_type: Optional[str] = None, **kwargs):
super().__init__(reduction_type, **kwargs)
def global_max(self, input_node):
return layer_utils.get_global_max_pooling(input_node.shape)()(
input_node
)
def global_avg(self, input_node):
return layer_utils.get_global_average_pooling(input_node.shape)()(
input_node
)
@keras.utils.register_keras_serializable(package="autokeras")
class TemporalReduction(Reduction):
"""Reduce the dim of a temporal tensor, e.g. output of RNN, to a vector.
# Arguments
reduction_type: String. 'flatten', 'global_max' or 'global_avg'. If left
unspecified, it will be tuned automatically.
"""
def __init__(self, reduction_type: Optional[str] = None, **kwargs):
super().__init__(reduction_type, **kwargs)
def global_max(self, input_node):
return ops.max(input_node, axis=-2)
def global_avg(self, input_node):
return ops.mean(input_node, axis=-2)
================================================
FILE: autokeras/blocks/reduction_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import keras
import keras_tuner
import tree
from autokeras import blocks
from autokeras import test_utils
def test_merge_build_return_tensor():
block = blocks.Merge()
outputs = block.build(
keras_tuner.HyperParameters(),
[
keras.Input(shape=(32,), dtype="float32"),
keras.Input(shape=(4, 8), dtype="float32"),
],
)
assert len(tree.flatten(outputs)) == 1
def test_merge_single_input_return_tensor():
block = blocks.Merge()
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32,), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_merge_inputs_with_same_shape_return_tensor():
block = blocks.Merge()
outputs = block.build(
keras_tuner.HyperParameters(),
[
keras.Input(shape=(32,), dtype="float32"),
keras.Input(shape=(32,), dtype="float32"),
],
)
assert len(tree.flatten(outputs)) == 1
def test_merge_deserialize_to_merge():
serialized_block = blocks.serialize(blocks.Merge())
block = blocks.deserialize(serialized_block)
assert isinstance(block, blocks.Merge)
def test_merge_get_config_has_all_attributes():
block = blocks.Merge()
config = block.get_config()
assert test_utils.get_func_args(blocks.Merge.__init__).issubset(
config.keys()
)
def test_temporal_build_return_tensor():
block = blocks.TemporalReduction()
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32, 10), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_temporal_global_max_return_tensor():
block = blocks.TemporalReduction(reduction_type="global_max")
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32, 10), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_temporal_global_avg_return_tensor():
block = blocks.TemporalReduction(reduction_type="global_avg")
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32, 10), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_reduction_2d_tensor_return_input_node():
block = blocks.TemporalReduction()
input_node = keras.Input(shape=(32,), dtype="float32")
outputs = block.build(
keras_tuner.HyperParameters(),
input_node,
)
assert len(tree.flatten(outputs)) == 1
assert tree.flatten(outputs)[0] is input_node
def test_temporal_deserialize_to_temporal():
serialized_block = blocks.serialize(blocks.TemporalReduction())
block = blocks.deserialize(serialized_block)
assert isinstance(block, blocks.TemporalReduction)
def test_temporal_get_config_has_all_attributes():
block = blocks.TemporalReduction()
config = block.get_config()
assert test_utils.get_func_args(blocks.TemporalReduction.__init__).issubset(
config.keys()
)
def test_spatial_build_return_tensor():
block = blocks.SpatialReduction()
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32, 32, 3), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_spatial_deserialize_to_spatial():
serialized_block = blocks.serialize(blocks.SpatialReduction())
block = blocks.deserialize(serialized_block)
assert isinstance(block, blocks.SpatialReduction)
def test_spatial_get_config_has_all_attributes():
block = blocks.SpatialReduction()
config = block.get_config()
assert test_utils.get_func_args(blocks.SpatialReduction.__init__).issubset(
config.keys()
)
================================================
FILE: autokeras/blocks/wrapper.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Optional
import keras
import tree
from autokeras.blocks import basic
from autokeras.blocks import preprocessing
from autokeras.blocks import reduction
from autokeras.engine import block as block_module
from autokeras.utils import utils
BLOCK_TYPE = "block_type"
RESNET = "resnet"
XCEPTION = "xception"
VANILLA = "vanilla"
EFFICIENT = "efficient"
NORMALIZE = "normalize"
AUGMENT = "augment"
MAX_TOKENS = "max_tokens"
@keras.utils.register_keras_serializable(package="autokeras")
class ImageBlock(block_module.Block):
"""Block for image data.
The image blocks is a block choosing from ResNetBlock, XceptionBlock,
ConvBlock, which is controlled by a hyperparameter, 'block_type'.
# Arguments
block_type: String. 'resnet', 'xception', 'vanilla'. The type of Block
to use. If unspecified, it will be tuned automatically.
normalize: Boolean. Whether to channel-wise normalize the images.
If unspecified, it will be tuned automatically.
augment: Boolean. Whether to do image augmentation. If unspecified,
it will be tuned automatically.
"""
def __init__(
self,
block_type: Optional[str] = None,
normalize: Optional[bool] = None,
augment: Optional[bool] = None,
**kwargs
):
super().__init__(**kwargs)
self.block_type = block_type
self.normalize = normalize
self.augment = augment
def get_config(self):
config = super().get_config()
config.update(
{
BLOCK_TYPE: self.block_type,
NORMALIZE: self.normalize,
AUGMENT: self.augment,
}
)
return config
def _build_block(self, hp, output_node, block_type):
if block_type == RESNET:
return basic.ResNetBlock().build(hp, output_node)
elif block_type == XCEPTION:
return basic.XceptionBlock().build(hp, output_node)
elif block_type == VANILLA:
return basic.ConvBlock().build(hp, output_node)
elif block_type == EFFICIENT:
return basic.EfficientNetBlock().build(hp, output_node)
def build(self, hp, inputs=None):
input_node = tree.flatten(inputs)[0]
output_node = input_node
if self.normalize is None and hp.Boolean(NORMALIZE):
with hp.conditional_scope(NORMALIZE, [True]):
output_node = preprocessing.Normalization().build(
hp, output_node
)
elif self.normalize:
output_node = preprocessing.Normalization().build(hp, output_node)
if self.augment is None and hp.Boolean(AUGMENT):
with hp.conditional_scope(AUGMENT, [True]):
output_node = preprocessing.ImageAugmentation().build(
hp, output_node
)
elif self.augment:
output_node = preprocessing.ImageAugmentation().build(
hp, output_node
)
if self.block_type is None:
block_type = hp.Choice(
BLOCK_TYPE, [RESNET, XCEPTION, VANILLA, EFFICIENT]
)
with hp.conditional_scope(BLOCK_TYPE, [block_type]):
output_node = self._build_block(hp, output_node, block_type)
else:
output_node = self._build_block(hp, output_node, self.block_type)
return output_node
@keras.utils.register_keras_serializable(package="autokeras")
class TextBlock(block_module.Block):
"""Block for text data.
# Arguments
max_tokens: Int. The maximum size of the vocabulary.
If left unspecified, it will be tuned automatically.
"""
def __init__(self, max_tokens=None, **kwargs):
super().__init__(**kwargs)
self.max_tokens = max_tokens
def build(self, hp, inputs=None):
input_node = tree.flatten(inputs)[0]
output_node = input_node
output_node = self._build_block(hp, output_node)
return output_node
def get_config(self):
config = super().get_config()
config.update({"max_tokens": self.max_tokens})
return config
def _build_block(self, hp, output_node):
# Use Embedding and dense layers for tokenized text
max_tokens = self.max_tokens or hp.Choice(
MAX_TOKENS, [500, 5000, 20000], default=5000
)
output_node = basic.Embedding(
max_features=max_tokens + 1,
).build(hp, output_node)
output_node = basic.ConvBlock().build(hp, output_node)
output_node = reduction.SpatialReduction().build(hp, output_node)
output_node = basic.DenseBlock().build(hp, output_node)
return output_node
@keras.utils.register_keras_serializable(package="autokeras")
class GeneralBlock(block_module.Block):
"""A general neural network block when the input type is unknown.
When the input type is unknown. The GeneralBlock would search in a large
space for a good model.
# Arguments
name: String.
"""
def build(self, hp, inputs=None):
inputs = tree.flatten(inputs)
utils.validate_num_inputs(inputs, 1)
input_node = inputs[0]
output_node = input_node
output_node = reduction.Flatten().build(hp, output_node)
output_node = basic.DenseBlock().build(hp, output_node)
return output_node
@keras.utils.register_keras_serializable(package="autokeras")
class StructuredDataBlock(block_module.Block):
"""Block for structured data.
# Arguments
categorical_encoding: Boolean. Whether to use the CategoricalToNumerical
to encode the categorical features to numerical features. Defaults
to True.
normalize: Boolean. Whether to normalize the features.
If unspecified, it will be tuned automatically.
"""
def __init__(self, normalize: Optional[bool] = None, **kwargs):
super().__init__(**kwargs)
self.normalize = normalize
def get_config(self):
config = super().get_config()
config.update(
{
"normalize": self.normalize,
}
)
return config
def build(self, hp, inputs=None):
input_node = tree.flatten(inputs)[0]
output_node = input_node
if self.normalize is None and hp.Boolean(NORMALIZE):
with hp.conditional_scope(NORMALIZE, [True]):
output_node = preprocessing.Normalization().build(
hp, output_node
)
elif self.normalize:
output_node = preprocessing.Normalization().build(hp, output_node)
output_node = basic.DenseBlock().build(hp, output_node)
return output_node
================================================
FILE: autokeras/blocks/wrapper_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import keras
import keras_tuner
import tree
from autokeras import analysers
from autokeras import blocks
from autokeras import test_utils
def test_image_build_return_tensor():
block = blocks.ImageBlock()
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32, 32, 3), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_general_build_return_tensor():
block = blocks.GeneralBlock()
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32, 32, 3), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_image_block_xception_return_tensor():
block = blocks.ImageBlock(block_type="xception")
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32, 32, 3), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_image_block_normalize_return_tensor():
block = blocks.ImageBlock(normalize=True)
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32, 32, 3), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_image_block_augment_return_tensor():
block = blocks.ImageBlock(augment=True)
outputs = block.build(
keras_tuner.HyperParameters(),
keras.Input(shape=(32, 32, 3), dtype="float32"),
)
assert len(tree.flatten(outputs)) == 1
def test_image_deserialize_to_image():
serialized_block = blocks.serialize(blocks.ImageBlock())
block = blocks.deserialize(serialized_block)
assert isinstance(block, blocks.ImageBlock)
def test_image_get_config_has_all_attributes():
block = blocks.ImageBlock()
config = block.get_config()
assert test_utils.get_func_args(blocks.ImageBlock.__init__).issubset(
config.keys()
)
def test_text_build_return_tensor():
block = blocks.TextBlock()
outputs = block.build(
keras_tuner.HyperParameters(), keras.Input(shape=(1,), dtype="string")
)
assert len(tree.flatten(outputs)) == 1
def test_text_deserialize_to_text():
serialized_block = blocks.serialize(blocks.TextBlock())
block = blocks.deserialize(serialized_block)
assert isinstance(block, blocks.TextBlock)
def test_text_get_config_has_all_attributes():
block = blocks.TextBlock()
config = block.get_config()
assert test_utils.get_func_args(blocks.TextBlock.__init__).issubset(
config.keys()
)
def test_structured_build_return_tensor():
block = blocks.StructuredDataBlock()
block.column_names = ["0", "1"]
block.column_types = {"0": analysers.NUMERICAL, "1": analysers.NUMERICAL}
outputs = block.build(
keras_tuner.HyperParameters(), keras.Input(shape=(2,), dtype="string")
)
assert len(tree.flatten(outputs)) == 1
def test_structured_block_normalize_return_tensor():
block = blocks.StructuredDataBlock(normalize=True)
block.column_names = ["0", "1"]
block.column_types = {"0": analysers.NUMERICAL, "1": analysers.NUMERICAL}
outputs = block.build(
keras_tuner.HyperParameters(), keras.Input(shape=(2,), dtype="string")
)
assert len(tree.flatten(outputs)) == 1
def test_structured_block_search_normalize_return_tensor():
block = blocks.StructuredDataBlock(name="a")
block.column_names = ["0", "1"]
block.column_types = {"0": analysers.NUMERICAL, "1": analysers.NUMERICAL}
hp = keras_tuner.HyperParameters()
hp.values["a/" + blocks.wrapper.NORMALIZE] = True
outputs = block.build(hp, keras.Input(shape=(2,), dtype="string"))
assert len(tree.flatten(outputs)) == 1
def test_structured_deserialize_to_structured():
serialized_block = blocks.serialize(blocks.StructuredDataBlock())
block = blocks.deserialize(serialized_block)
assert isinstance(block, blocks.StructuredDataBlock)
def test_structured_get_config_has_all_attributes():
block = blocks.StructuredDataBlock()
config = block.get_config()
assert test_utils.get_func_args(
blocks.StructuredDataBlock.__init__
).issubset(config.keys())
================================================
FILE: autokeras/conftest.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import shutil
import keras
import pytest
@pytest.fixture(autouse=True)
def clear_session():
keras.backend.clear_session()
yield
keras.backend.clear_session()
@pytest.fixture(autouse=True)
def remove_tmp_path(tmp_path):
yield
shutil.rmtree(tmp_path)
@pytest.fixture(autouse=True)
def disable_traceback_filtering():
keras.config.disable_traceback_filtering()
yield
================================================
FILE: autokeras/engine/__init__.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
================================================
FILE: autokeras/engine/adapter.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
class Adapter(object):
"""Adpat the input and output format for Keras Model.
Adapter is used by the input nodes and the heads of the hypermodel. It do
some type checking for the data and converts it to compatible format.
"""
def check(self, dataset):
"""Check if the dataset is valid for the input node.
# Arguments
dataset: numpy.ndarray. The dataset to be checked.
# Returns
Boolean. Whether the dataset is in compatible format.
"""
return True
def adapt(self, dataset):
"""Check, convert and batch the dataset.
# Arguments
dataset: Usually numpy.ndarray. The dataset to be converted.
# Returns
numpy.ndarray. The converted dataset.
"""
self.check(dataset)
return dataset
================================================
FILE: autokeras/engine/adapter_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from autokeras.engine import adapter as adapter_module
def test_adapter_check_return_true():
adapter = adapter_module.Adapter()
assert adapter.check(None)
================================================
FILE: autokeras/engine/analyser.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
class Analyser(object):
"""Analyze the dataset for useful information.
Analyser is used by the input nodes and the heads of the hypermodel. It
analyzes the dataset to get useful information, e.g., the shape of the
data, the data type of the dataset. The information will be used by the
input nodes and heads to construct the data pipeline and to build the Keras
Model.
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
# Shape is a list of integers
self.shape = None
self.dtype = None
self.num_samples = 0
self.batch_size = None
def update(self, data):
"""Update the statistics with a batch of data.
# Arguments
data: np.ndarray. The entire dataset.
"""
if self.dtype is None:
if np.issubdtype(data.dtype, np.str_) or np.issubdtype(
data.dtype, np.bytes_
):
self.dtype = "string"
else:
self.dtype = str(data.dtype)
if self.shape is None:
self.shape = list(data.shape)
if self.batch_size is None:
self.batch_size = data.shape[0]
self.num_samples += data.shape[0]
def finalize(self):
"""Process recorded information after all updates."""
raise NotImplementedError
================================================
FILE: autokeras/engine/analyser_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import pytest
from autokeras.engine.analyser import Analyser
def test_analyser_update_unicode_string_dtype():
analyser = Analyser()
data = np.array(["hello", "world"], dtype="U10")
analyser.update(data)
assert analyser.dtype == "string"
assert analyser.shape == [2]
assert analyser.batch_size == 2
assert analyser.num_samples == 2
def test_analyser_update_byte_string_dtype():
analyser = Analyser()
data = np.array([b"hello", b"world"], dtype="S10")
analyser.update(data)
assert analyser.dtype == "string"
assert analyser.shape == [2]
assert analyser.batch_size == 2
assert analyser.num_samples == 2
def test_analyser_update_numeric_dtype():
analyser = Analyser()
data = np.array([1, 2, 3], dtype=np.int32)
analyser.update(data)
assert analyser.dtype == "int32"
assert analyser.shape == [3]
assert analyser.batch_size == 3
assert analyser.num_samples == 3
def test_analyser_update_float_dtype():
analyser = Analyser()
data = np.array([1.0, 2.0, 3.0], dtype=np.float64)
analyser.update(data)
assert analyser.dtype == "float64"
assert analyser.shape == [3]
assert analyser.batch_size == 3
assert analyser.num_samples == 3
def test_analyser_finalize_not_implemented():
analyser = Analyser()
with pytest.raises(NotImplementedError):
analyser.finalize()
================================================
FILE: autokeras/engine/block.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tree
from autokeras.engine import named_hypermodel
from autokeras.engine import node as node_module
class Block(named_hypermodel.NamedHyperModel):
"""The base class for different Block.
The Block can be connected together to build the search space for an
AutoModel. Notably, many args in the __init__ function are defaults to be a
tunable variable when not specified by the user.
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.inputs = None
self.outputs = None
self._num_output_node = 1
def _build_wrapper(self, hp, *args, **kwargs):
with hp.name_scope(self.name):
return super()._build_wrapper(hp, *args, **kwargs)
def __call__(self, inputs):
"""Functional API.
# Arguments
inputs: A list of input node(s) or a single input node for the
block.
# Returns
list: A list of output node(s) of the Block.
"""
self.inputs = tree.flatten(inputs)
for input_node in self.inputs:
if not isinstance(input_node, node_module.Node):
raise TypeError(
"Expect the inputs to block {name} to be "
"a Node, but got {type}.".format(
name=self.name, type=type(input_node)
)
)
input_node.add_out_block(self)
self.outputs = []
for _ in range(self._num_output_node):
output_node = node_module.Node()
output_node.add_in_block(self)
self.outputs.append(output_node)
return self.outputs
def build(self, hp, inputs=None):
"""Build the Block into a real Keras Model.
The subclasses should override this function and return the output node.
# Arguments
hp: HyperParameters. The hyperparameters for building the model.
inputs: A list of input node(s).
"""
raise NotImplementedError
================================================
FILE: autokeras/engine/block_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pytest
from autokeras.engine import block as block_module
def test_block_call_raise_inputs_type_error():
block = block_module.Block()
with pytest.raises(TypeError) as info:
block(None)
assert "Expect the inputs to block" in str(info.value)
================================================
FILE: autokeras/engine/head.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Optional
import keras
from autokeras.engine import io_hypermodel
from autokeras.utils import types
def serialize_metrics(metrics):
serialized = []
for metric in metrics:
if isinstance(metric, str):
serialized.append([metric])
else:
serialized.append(keras.metrics.serialize(metric))
return serialized
def deserialize_metrics(metrics):
deserialized = []
for metric in metrics:
if isinstance(metric, list):
deserialized.append(metric[0])
else:
deserialized.append(keras.metrics.deserialize(metric))
return deserialized
def serialize_loss(loss):
if isinstance(loss, str):
return [loss]
return keras.losses.serialize(loss)
def deserialize_loss(loss):
if isinstance(loss, list):
return loss[0]
return keras.losses.deserialize(loss)
class Head(io_hypermodel.IOHyperModel):
"""Base class for the heads, e.g. classification, regression.
# Arguments
loss: A Keras loss function. Defaults to None. If None, the loss will be
inferred from the AutoModel.
metrics: A list of Keras metrics. Defaults to None. If None, the metrics
will be inferred from the AutoModel.
"""
def __init__(
self,
loss: Optional[types.LossType] = None,
metrics: Optional[types.MetricsType] = None,
**kwargs
):
super().__init__(**kwargs)
self.loss = loss
if metrics is None:
metrics = []
self.metrics = metrics
# Mark if the head should directly output the input tensor.
def get_config(self):
config = super().get_config()
config.update(
{
"loss": serialize_loss(self.loss),
"metrics": serialize_metrics(self.metrics),
}
)
return config
@classmethod
def from_config(cls, config):
config["loss"] = deserialize_loss(config["loss"])
config["metrics"] = deserialize_metrics(config["metrics"])
return super().from_config(config)
def build(self, hp, inputs=None):
raise NotImplementedError
================================================
FILE: autokeras/engine/head_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from autokeras.engine import head as head_module
def test_init_head_none_metrics():
assert isinstance(head_module.Head().metrics, list)
================================================
FILE: autokeras/engine/hyper_preprocessor.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from autokeras.engine import named_hypermodel
class HyperPreprocessor(named_hypermodel.NamedHyperModel):
"""Input data preprocessor search space.
This class defines the search space for a Preprocessor.
"""
def build(self, hp, dataset):
"""Build the input preprocessor.
# Arguments
hp: `HyperParameters` instance. The hyperparameters for building the
a Preprocessor.
dataset: np.ndarray. The dataset to be preprocessed.
# Returns
an instance of Preprocessor.
"""
raise NotImplementedError
================================================
FILE: autokeras/engine/io_hypermodel.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from autokeras.engine import block as block_module
class IOHyperModel(block_module.Block):
"""A mixin class connecting the input nodes and heads with the adapters.
This class is extended by the input nodes and the heads. The AutoModel calls
the functions to get the corresponding adapters and pass the information
back to the input nodes and heads.
"""
def __init__(self, shape=None, **kwargs):
super().__init__(**kwargs)
self.shape = shape
self.data_shape = None
self.dtype = None
self.batch_size = None
self.num_samples = None
def get_analyser(self):
"""Get the corresponding Analyser.
# Returns
An instance of a subclass of autokeras.engine.Analyser.
"""
raise NotImplementedError
def get_adapter(self):
"""Get the corresponding Adapter.
# Returns
An instance of a subclass of autokeras.engine.Adapter.
"""
raise NotImplementedError
def config_from_analyser(self, analyser):
"""Load the learned information on dataset from the Analyser.
# Arguments
adapter: An instance of a subclass of autokeras.engine.Adapter.
"""
self.data_shape = analyser.shape
self.dtype = analyser.dtype
self.batch_size = analyser.batch_size
self.num_samples = analyser.num_samples
def get_hyper_preprocessors(self):
"""Construct a list of HyperPreprocessors based on learned information.
# Returns
A list of HyperPreprocessors for the corresponding data.
"""
raise NotImplementedError
def get_config(self):
config = super().get_config()
config.update({"shape": self.shape})
return config
================================================
FILE: autokeras/engine/named_hypermodel.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import keras
import keras_tuner
from autokeras.engine import serializable
from autokeras.utils import utils
class NamedHyperModel(keras_tuner.HyperModel, serializable.Serializable):
"""
# Arguments
name: String. The name of the HyperModel. If unspecified, it will be set
automatically with the class name.
"""
def __init__(self, name: str = None, **kwargs):
if not name:
prefix = self.__class__.__name__
name = prefix + "_" + str(keras.backend.get_uid(prefix))
name = utils.to_snake_case(name)
super().__init__(name=name, **kwargs)
def get_config(self):
"""Get the configuration of the preprocessor.
# Returns
A dictionary of configurations of the preprocessor.
"""
return {"name": self.name, "tunable": self.tunable}
================================================
FILE: autokeras/engine/node.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
class Node(object):
"""The nodes in a network connecting the blocks."""
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.in_blocks = []
self.out_blocks = []
def add_in_block(self, hypermodel):
self.in_blocks.append(hypermodel)
def add_out_block(self, hypermodel):
self.out_blocks.append(hypermodel)
================================================
FILE: autokeras/engine/node_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
================================================
FILE: autokeras/engine/preprocessor.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from autokeras.engine import serializable
class Preprocessor(serializable.Serializable):
"""A preprocessor for np.ndarray.
A preprocessor transforms the dataset (numpy.ndarray).
"""
def fit(self, dataset):
"""Fit the preprocessor with the dataset.
# Arguments
dataset: an instance of `np.ndarray`.
"""
# TODO: may need to change to a streaming way of fit to reduce the
# number of iterations through the dataset for speed. Need to be
# decided when we have more use cases for this fit.
pass
def transform(self, dataset):
"""Transform the dataset wth the preprocessor.
# Arguments
dataset: an instance of `np.ndarray`.
# Returns
The transformed dataset.
"""
raise NotImplementedError
def get_config(self):
return {}
class TargetPreprocessor(Preprocessor):
"""Preprocessor for target data."""
def postprocess(self, dataset):
"""Postprocess the output of the Keras model.
# Arguments
dataset: numpy.ndarray. The corresponding output of the model.
# Returns
numpy.ndarray. The postprocessed data.
"""
raise NotImplementedError
================================================
FILE: autokeras/engine/serializable.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
class Serializable(object):
"""Serializable from and to JSON with same mechanism as Keras Layer."""
def get_config(self):
"""Returns the current config of this object.
# Returns
Dictionary.
"""
raise NotImplementedError
@classmethod
def from_config(cls, config):
"""Build an instance from the config of this object.
# Arguments
config: Dict. The config of the object.
"""
return cls(**config)
================================================
FILE: autokeras/engine/tuner.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import copy
import os
import keras
import keras_tuner
import numpy as np
import tree
from keras import callbacks as callbacks_module
from autokeras import keras_layers
from autokeras import pipeline as pipeline_module
from autokeras.utils import data_utils
from autokeras.utils import utils
class AutoTuner(keras_tuner.engine.tuner.Tuner):
"""A Tuner class based on KerasTuner for AutoKeras.
Different from KerasTuner's Tuner class. AutoTuner's not only tunes the
Hypermodel which can be directly built into a Keras model, but also the
preprocessors. Therefore, a HyperGraph stores the overall search space
containing both the Preprocessors and Hypermodel. For every trial, the
HyperGraph builds the PreprocessGraph and KerasGraph with the provided
HyperParameters.
The AutoTuner uses EarlyStopping for acceleration during the search and
fully trains the model with full epochs and with both training and
validation data. The fully trained model is the best model to be used by
AutoModel.
# Arguments
oracle: keras_tuner Oracle.
hypermodel: keras_tuner HyperModel.
**kwargs: The args supported by KerasTuner.
"""
def __init__(self, oracle, hypermodel, **kwargs):
# Initialize before super() for reload to work.
self._finished = False
super().__init__(oracle, hypermodel, **kwargs)
# Save or load the HyperModel.
self.hypermodel.save(os.path.join(self.project_dir, "graph"))
self.hyper_pipeline = None
def _populate_initial_space(self):
# Override the function to prevent building the model during
# initialization.
return
def get_best_model(self):
return self.get_best_models()[0]
def get_best_pipeline(self):
return pipeline_module.load_pipeline(self.best_pipeline_path)
def _pipeline_path(self, trial_id):
return os.path.join(self.get_trial_dir(trial_id), "pipeline")
def _prepare_model_build(self, hp, **kwargs):
"""Prepare for building the Keras model.
It builds the Pipeline from HyperPipeline, transforms the dataset to set
the input shapes and output shapes of the HyperModel.
"""
x = kwargs["x"]
y = kwargs["y"]
pipeline = self.hyper_pipeline.build(hp, (x, y))
pipeline.fit((x, y))
(x, y) = pipeline.transform((x, y))
self.hypermodel.set_io_shapes(data_utils.dataset_shape((x, y)))
if "validation_data" in kwargs:
validation_data = pipeline.transform(kwargs["validation_data"])
else:
validation_data = None
return pipeline, (x, y), validation_data
def _build_and_fit_model(self, trial, *args, **kwargs):
model = self._try_build(trial.hyperparameters)
(
pipeline,
(
kwargs["x"],
kwargs["y"],
),
kwargs["validation_data"],
) = self._prepare_model_build(trial.hyperparameters, **kwargs)
pipeline.save(self._pipeline_path(trial.trial_id))
keras.src.backend.compute_output_spec(model, kwargs["x"])
self.adapt(model, kwargs["x"])
_, history = utils.fit_with_adaptive_batch_size(model, **kwargs)
return history
@staticmethod
def adapt(model, dataset):
"""Adapt the preprocessing layers in the model."""
# Currently, only support using the original dataset to adapt all the
# preprocessing layers before the first non-preprocessing layer.
# TODO: Use PreprocessingStage for preprocessing layers adapt.
# TODO: Use Keras Tuner for preprocessing layers adapt.
x = tree.flatten(dataset)
def get_output_layers(tensor):
output_layers = []
tensor = tree.flatten(tensor)[0]
for layer in model.layers:
if isinstance(layer, keras.layers.InputLayer):
continue
input_node = tree.flatten(layer.input)[0]
if input_node is tensor:
if isinstance(
layer,
keras_layers.PreprocessingLayer,
) or hasattr(layer, "adapt"):
output_layers.append(layer)
return output_layers
dq = collections.deque()
for index, input_node in enumerate(tree.flatten(model.input)):
in_x = x[index]
for layer in get_output_layers(input_node):
dq.append((layer, in_x))
while len(dq):
layer, in_x = dq.popleft()
layer.adapt(in_x)
out_x = layer(in_x)
for next_layer in get_output_layers(layer.output):
dq.append((next_layer, out_x))
return model
def search(
self,
epochs=None,
callbacks=None,
validation_split=0,
verbose=1,
**fit_kwargs
):
"""Search for the best HyperParameters.
If there is not early-stopping in the callbacks, the early-stopping
callback is injected to accelerate the search process. At the end of the
search, the best model will be fully trained with the specified number
of epochs.
# Arguments
callbacks: A list of callback functions. Defaults to None.
validation_split: Float.
"""
if self._finished:
return
if callbacks is None:
callbacks = []
self.hypermodel.set_fit_args(validation_split, epochs=epochs)
# Insert early-stopping for adaptive number of epochs.
epochs_provided = True
if epochs is None:
epochs_provided = False
epochs = 1000
if not utils.contain_instance(
callbacks, callbacks_module.EarlyStopping
):
callbacks.append(
callbacks_module.EarlyStopping(patience=10, min_delta=1e-4)
)
# Insert early-stopping for acceleration.
early_stopping_inserted = False
new_callbacks = self._deepcopy_callbacks(callbacks)
if not utils.contain_instance(
callbacks, callbacks_module.EarlyStopping
):
early_stopping_inserted = True
new_callbacks.append(
callbacks_module.EarlyStopping(patience=10, min_delta=1e-4)
)
# Populate initial search space.
hp = self.oracle.get_space()
self._prepare_model_build(hp, **fit_kwargs)
self._try_build(hp)
self.oracle.update_space(hp)
super().search(
epochs=epochs,
callbacks=new_callbacks,
verbose=verbose,
**fit_kwargs
)
# Train the best model use validation data.
# Train the best model with enough number of epochs.
if validation_split > 0 or early_stopping_inserted:
copied_fit_kwargs = copy.copy(fit_kwargs)
# Remove early-stopping since no validation data.
# Remove early-stopping since it is inserted.
copied_fit_kwargs["callbacks"] = self._remove_early_stopping(
callbacks
)
# Decide the number of epochs.
copied_fit_kwargs["epochs"] = epochs
if not epochs_provided:
copied_fit_kwargs["epochs"] = self._get_best_trial_epochs()
# Concatenate training and validation data.
if validation_split > 0:
x, y = copied_fit_kwargs["x"], copied_fit_kwargs["y"]
x_val, y_val = fit_kwargs["validation_data"]
copied_fit_kwargs["x"] = tree.map_structure(
lambda train, val: np.concatenate([train, val], axis=0),
x,
x_val,
)
copied_fit_kwargs["y"] = tree.map_structure(
lambda train, val: np.concatenate([train, val], axis=0),
y,
y_val,
)
copied_fit_kwargs.pop("validation_data")
self.hypermodel.set_fit_args(0, epochs=copied_fit_kwargs["epochs"])
copied_fit_kwargs["verbose"] = verbose
pipeline, model, history = self.final_fit(**copied_fit_kwargs)
else:
# TODO: Add return history functionality in Keras Tuner
model = self.get_best_model()
history = None
pipeline = pipeline_module.load_pipeline(
self._pipeline_path(self.oracle.get_best_trials(1)[0].trial_id)
)
model.save(self.best_model_path)
pipeline.save(self.best_pipeline_path)
self._finished = True
return history
def get_state(self):
state = super().get_state()
state.update({"finished": self._finished})
return state
def set_state(self, state):
super().set_state(state)
self._finished = state.get("finished")
@staticmethod
def _remove_early_stopping(callbacks):
return [
copy.deepcopy(callbacks)
for callback in callbacks
if not isinstance(callback, callbacks_module.EarlyStopping)
]
def _get_best_trial_epochs(self):
best_trial = self.oracle.get_best_trials(1)[0]
# steps counts from 0, so epochs = step + 1.
return self.oracle.get_trial(best_trial.trial_id).best_step + 1
def _build_best_model(self):
best_trial = self.oracle.get_best_trials(1)[0]
best_hp = best_trial.hyperparameters
return self._try_build(best_hp)
def final_fit(self, **kwargs):
best_trial = self.oracle.get_best_trials(1)[0]
best_hp = best_trial.hyperparameters
(
pipeline,
(kwargs["x"], kwargs["y"]),
kwargs["validation_data"],
) = self._prepare_model_build(best_hp, **kwargs)
model = self._build_best_model()
self.adapt(model, kwargs["x"])
model, history = utils.fit_with_adaptive_batch_size(model, **kwargs)
return pipeline, model, history
@property
def best_model_path(self):
return os.path.join(self.project_dir, "best_model.keras")
@property
def best_pipeline_path(self):
return os.path.join(self.project_dir, "best_pipeline")
@property
def objective(self):
return self.oracle.objective
@property
def max_trials(self):
return self.oracle.max_trials
================================================
FILE: autokeras/engine/tuner_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from unittest import mock
import keras
import numpy as np
import tree
import autokeras as ak
from autokeras import test_utils
from autokeras.tuners import greedy
def called_with_early_stopping(func):
callbacks = func.call_args_list[0][1]["callbacks"]
return any(
[
isinstance(callback, keras.callbacks.EarlyStopping)
for callback in callbacks
]
)
@mock.patch("keras_tuner.engine.base_tuner.BaseTuner.search")
@mock.patch("autokeras.engine.tuner.AutoTuner.final_fit")
@mock.patch("autokeras.engine.tuner.AutoTuner._prepare_model_build")
def test_final_fit_with_specified_epochs(_, final_fit, super_search, tmp_path):
tuner = greedy.Greedy(
hypermodel=test_utils.build_graph(), directory=tmp_path
)
final_fit.return_value = mock.Mock(), mock.Mock(), mock.Mock()
tuner.search(x=None, epochs=10, validation_data=None)
assert final_fit.call_args_list[0][1]["epochs"] == 10
@mock.patch("keras_tuner.engine.base_tuner.BaseTuner.search")
@mock.patch("autokeras.engine.tuner.AutoTuner.final_fit")
@mock.patch("autokeras.engine.tuner.AutoTuner._prepare_model_build")
def test_tuner_call_super_with_early_stopping(
_, final_fit, super_search, tmp_path
):
tuner = greedy.Greedy(
hypermodel=test_utils.build_graph(), directory=tmp_path
)
final_fit.return_value = mock.Mock(), mock.Mock(), mock.Mock()
tuner.search(x=None, epochs=10, validation_data=None)
assert called_with_early_stopping(super_search)
@mock.patch("keras_tuner.engine.base_tuner.BaseTuner.search")
@mock.patch("autokeras.engine.tuner.AutoTuner.final_fit")
@mock.patch(
"autokeras.engine.tuner.AutoTuner.get_best_models",
return_value=[mock.Mock()],
)
@mock.patch("autokeras.engine.tuner.AutoTuner._prepare_model_build")
@mock.patch("autokeras.pipeline.load_pipeline")
@mock.patch("keras_tuner.Oracle.get_best_trials", return_value=[mock.Mock()])
def test_no_final_fit_without_epochs_and_fov(
_, _1, _2, get_best_models, final_fit, super_search, tmp_path
):
tuner = greedy.Greedy(
hypermodel=test_utils.build_graph(), directory=tmp_path
)
tuner.search(x=None, epochs=None, validation_data=None)
final_fit.assert_not_called()
@mock.patch("keras_tuner.engine.base_tuner.BaseTuner.search")
@mock.patch("autokeras.engine.tuner.AutoTuner.final_fit")
@mock.patch(
"autokeras.engine.tuner.AutoTuner._get_best_trial_epochs", return_value=2
)
@mock.patch("autokeras.engine.tuner.AutoTuner._prepare_model_build")
def test_final_fit_best_epochs_if_epoch_unspecified(
_, best_epochs, final_fit, super_search, tmp_path
):
tuner = greedy.Greedy(
hypermodel=test_utils.build_graph(), directory=tmp_path
)
final_fit.return_value = mock.Mock(), mock.Mock(), mock.Mock()
tuner.search(
x=np.random.rand(100, 32, 32, 3),
y=np.random.rand(100, 1),
epochs=None,
validation_split=0.2,
validation_data=(np.random.rand(20, 32, 32, 3), np.random.rand(20, 1)),
)
assert final_fit.call_args_list[0][1]["epochs"] == 2
@mock.patch("keras_tuner.engine.base_tuner.BaseTuner.search")
@mock.patch("autokeras.engine.tuner.AutoTuner.final_fit")
@mock.patch(
"autokeras.engine.tuner.AutoTuner._get_best_trial_epochs", return_value=2
)
@mock.patch("autokeras.engine.tuner.AutoTuner._prepare_model_build")
def test_super_with_1k_epochs_if_epoch_unspecified(
_, best_epochs, final_fit, super_search, tmp_path
):
tuner = greedy.Greedy(
hypermodel=test_utils.build_graph(), directory=tmp_path
)
final_fit.return_value = mock.Mock(), mock.Mock(), mock.Mock()
# TODO: try to use x, and y instead of tuple input across the lib.
tuner.search(
x=np.random.rand(100, 32, 32, 3),
y=np.random.rand(100, 1),
epochs=None,
validation_split=0.2,
validation_data=(np.random.rand(20, 32, 32, 3), np.random.rand(20, 1)),
)
assert super_search.call_args_list[0][1]["epochs"] == 1000
assert called_with_early_stopping(super_search)
@mock.patch("keras_tuner.engine.base_tuner.BaseTuner.search")
@mock.patch("autokeras.engine.tuner.AutoTuner.final_fit")
@mock.patch("autokeras.engine.tuner.AutoTuner._prepare_model_build")
def test_tuner_not_call_super_search_with_overwrite(
_, final_fit, super_search, tmp_path
):
tuner = greedy.Greedy(
hypermodel=test_utils.build_graph(), directory=tmp_path
)
final_fit.return_value = mock.Mock(), mock.Mock(), mock.Mock()
tuner.search(x=None, epochs=10, validation_data=None)
tuner.save()
super_search.reset_mock()
tuner = greedy.Greedy(
hypermodel=test_utils.build_graph(), directory=tmp_path
)
tuner.search(x=None, epochs=10, validation_data=None)
super_search.assert_not_called()
def test_tuner_does_not_crash_with_distribution_strategy(tmp_path):
tuner = greedy.Greedy(
hypermodel=test_utils.build_graph(),
directory=tmp_path,
)
tuner.hypermodel.build(tuner.oracle.hyperparameters)
def test_adapt_with_model_with_preprocessing_layer_only():
input_node = keras.Input(shape=(10,))
output_node = keras.layers.Normalization()(input_node)
model = keras.Model(input_node, output_node)
greedy.Greedy.adapt(
model,
(np.random.rand(100, 10), np.random.rand(100, 10)),
)
def test_build_block_in_blocks_with_same_name(tmp_path):
class Block1(ak.Block):
def build(self, hp, inputs):
hp.Boolean("a")
return keras.layers.Dense(3)(tree.flatten(inputs)[0])
class Block2(ak.Block):
def build(self, hp, inputs):
hp.Boolean("b")
return Block1().build(hp, inputs)
inputs = ak.Input()
outputs = Block2()(inputs)
outputs = ak.RegressionHead()(outputs)
auto_model = ak.AutoModel(inputs, outputs, max_trials=5, directory=tmp_path)
auto_model.fit(np.random.rand(100, 5), np.random.rand(100, 1), epochs=1)
trials = [
trial for trial_id, trial in auto_model.tuner.oracle.trials.items()
]
for trial in trials:
assert len(trial.hyperparameters.values) == len(
trials[0].hyperparameters.values
)
================================================
FILE: autokeras/graph.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import keras
import keras_tuner
import tree
from autokeras import blocks as blocks_module
from autokeras import nodes as nodes_module
from autokeras.engine import head as head_module
from autokeras.engine import serializable
from autokeras.utils import io_utils
def feature_encoding_input(block):
"""Fetch the column_types and column_names.
The values are fetched for FeatureEncoding from StructuredDataInput.
"""
block.column_types = block.inputs[0].column_types
block.column_names = block.inputs[0].column_names
# Compile the graph.
COMPILE_FUNCTIONS = {
blocks_module.StructuredDataBlock: [feature_encoding_input],
}
def load_graph(filepath, custom_objects=None):
if custom_objects is None:
custom_objects = {}
with keras.utils.custom_object_scope(custom_objects):
return Graph.from_config(io_utils.load_json(filepath))
class Graph(keras_tuner.HyperModel, serializable.Serializable):
"""A graph consists of connected Blocks, or Heads.
# Arguments
inputs: A list of input node(s) for the Graph.
outputs: A list of output node(s) for the Graph.
"""
def __init__(self, inputs=None, outputs=None, **kwargs):
super().__init__(**kwargs)
self.inputs = tree.flatten(inputs)
self.outputs = tree.flatten(outputs)
self._node_to_id = {}
self._nodes = []
self.blocks = []
self._block_to_id = {}
if inputs and outputs:
self._build_network()
# Temporary attributes
self.epochs = None
self.num_samples = None
def _build_network(self):
self._node_to_id = {}
# Recursively find all the interested nodes.
for input_node in self.inputs:
self._search_network(input_node, self.outputs, set(), set())
self._nodes = sorted(
list(self._node_to_id.keys()), key=lambda x: self._node_to_id[x]
)
for node in self.inputs + self.outputs:
if node not in self._node_to_id:
raise ValueError("Inputs and outputs not connected.")
# Find the blocks.
blocks = []
for input_node in self._nodes:
for block in input_node.out_blocks:
if (
any(
[
output_node in self._node_to_id
for output_node in block.outputs
]
)
and block not in blocks
):
blocks.append(block)
# Check if all the inputs of the blocks are set as inputs.
for block in blocks:
for input_node in block.inputs:
if input_node not in self._node_to_id:
raise ValueError(
"A required input is missing for HyperModel "
"{name}.".format(name=block.name)
)
# Calculate the in degree of all the nodes
in_degree = [0] * len(self._nodes)
for node_id, node in enumerate(self._nodes):
in_degree[node_id] = len(
[block for block in node.in_blocks if block in blocks]
)
# Add the blocks in topological order.
self.blocks = []
self._block_to_id = {}
while len(blocks) != 0:
new_added = []
# Collect blocks with in degree 0.
for block in blocks:
if any(
[in_degree[self._node_to_id[node]] for node in block.inputs]
):
continue
new_added.append(block)
# Remove the collected blocks from blocks.
for block in new_added:
blocks.remove(block)
for block in new_added:
# Add the collected blocks to the Graph.
self._add_block(block)
# Decrease the in degree of the output nodes.
for output_node in block.outputs:
output_node_id = self._node_to_id[output_node]
in_degree[output_node_id] -= 1
def _search_network(
self, input_node, outputs, in_stack_nodes, visited_nodes
):
visited_nodes.add(input_node)
in_stack_nodes.add(input_node)
outputs_reached = False
if input_node in outputs:
outputs_reached = True
for block in input_node.out_blocks:
for output_node in block.outputs:
if output_node in in_stack_nodes:
raise ValueError("The network has a cycle.")
if output_node not in visited_nodes:
self._search_network(
output_node, outputs, in_stack_nodes, visited_nodes
)
if output_node in self._node_to_id.keys():
outputs_reached = True
if outputs_reached:
self._add_node(input_node)
in_stack_nodes.remove(input_node)
def _add_block(self, block):
if block not in self.blocks:
block_id = len(self.blocks)
self._block_to_id[block] = block_id
self.blocks.append(block)
def _add_node(self, input_node):
if input_node not in self._node_to_id:
self._node_to_id[input_node] = len(self._node_to_id)
def get_config(self):
blocks = [blocks_module.serialize(block) for block in self.blocks]
nodes = {
str(self._node_to_id[node]): nodes_module.serialize(node)
for node in self.inputs
}
block_inputs = {
str(block_id): [self._node_to_id[node] for node in block.inputs]
for block_id, block in enumerate(self.blocks)
}
block_outputs = {
str(block_id): [self._node_to_id[node] for node in block.outputs]
for block_id, block in enumerate(self.blocks)
}
outputs = [self._node_to_id[node] for node in self.outputs]
return {
"blocks": blocks, # Dict {id: serialized}.
"nodes": nodes, # Dict {id: serialized}.
"outputs": outputs, # List of node_ids.
"block_inputs": block_inputs, # Dict {id: List of node_ids}.
"block_outputs": block_outputs, # Dict {id: List of node_ids}.
}
@classmethod
def from_config(cls, config):
blocks = [
blocks_module.deserialize(block) for block in config["blocks"]
]
nodes = {
int(node_id): nodes_module.deserialize(node)
for node_id, node in config["nodes"].items()
}
inputs = [nodes[node_id] for node_id in nodes]
for block_id, block in enumerate(blocks):
input_nodes = [
nodes[node_id]
for node_id in config["block_inputs"][str(block_id)]
]
output_nodes = tree.flatten(block(input_nodes))
for output_node, node_id in zip(
output_nodes, config["block_outputs"][str(block_id)]
):
nodes[node_id] = output_node
outputs = [nodes[node_id] for node_id in config["outputs"]]
return cls(inputs=inputs, outputs=outputs)
def compile(self):
"""Share the information between blocks."""
for block in self.blocks:
for func in COMPILE_FUNCTIONS.get(block.__class__, []):
func(block)
def build(self, hp):
"""Build the HyperModel into a Keras Model."""
self.compile()
keras_nodes = {}
keras_input_nodes = []
for node in self.inputs:
node_id = self._node_to_id[node]
input_node = node.build_node(hp)
output_node = node.build(hp, input_node)
keras_input_nodes.append(input_node)
keras_nodes[node_id] = output_node
for block in self.blocks:
temp_inputs = [
keras_nodes[self._node_to_id[input_node]]
for input_node in block.inputs
]
outputs = block.build(hp, inputs=temp_inputs)
outputs = tree.flatten(outputs)
for output_node, real_output_node in zip(block.outputs, outputs):
keras_nodes[self._node_to_id[output_node]] = real_output_node
model = keras.Model(
keras_input_nodes,
[
keras_nodes[self._node_to_id[output_node]]
for output_node in self.outputs
],
)
return self._compile_keras_model(hp, model)
def _get_metrics(self):
metrics = {}
for output_node in self.outputs:
block = output_node.in_blocks[0]
if isinstance(block, head_module.Head):
metrics[block.name] = block.metrics
return metrics
def _get_loss(self):
loss = {}
for output_node in self.outputs:
block = output_node.in_blocks[0]
if isinstance(block, head_module.Head):
loss[block.name] = block.loss
return loss
def _compile_keras_model(self, hp, model):
# Specify hyperparameters from compile(...)
optimizer_name = hp.Choice(
"optimizer",
["adam", "sgd", "adam_weight_decay"],
default="adam",
)
# TODO: add adadelta optimizer when it can optimize embedding layer on
# GPU.
learning_rate = hp.Choice(
"learning_rate", [1e-1, 1e-2, 1e-3, 1e-4, 2e-5, 1e-5], default=1e-3
)
if optimizer_name == "adam":
optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
elif optimizer_name == "sgd":
optimizer = keras.optimizers.SGD(learning_rate=learning_rate)
elif optimizer_name == "adam_weight_decay":
steps_per_epoch = max(
1, int(self.num_samples / (self.batch_size or 32))
)
num_train_steps = steps_per_epoch * self.epochs
lr_schedule = keras.optimizers.schedules.PolynomialDecay(
initial_learning_rate=learning_rate,
decay_steps=num_train_steps,
end_learning_rate=0.0,
)
optimizer = keras.optimizers.AdamW(
learning_rate=lr_schedule,
weight_decay=0.01,
beta_1=0.9,
beta_2=0.999,
epsilon=1e-6,
)
model.compile(
optimizer=optimizer,
metrics=self._get_metrics(),
loss=self._get_loss(),
)
return model
def save(self, filepath):
io_utils.save_json(filepath, self.get_config())
def set_io_shapes(self, shapes):
for node, shape in zip(self.inputs, tree.flatten(shapes[0])):
node.shape = tuple(shape[1:])
for node, shape in zip(self.outputs, tree.flatten(shapes[1])):
node.in_blocks[0].shape = tuple(shape[1:])
def set_fit_args(self, validation_split, epochs=None):
self.epochs = epochs
# Epochs not specified by the user
if self.epochs is None:
self.epochs = 1
validation_split = validation_split or 0
# num_samples from analysers are before split
self.num_samples = self.inputs[0].num_samples * (1 - validation_split)
@property
def batch_size(self):
return self.inputs[0].batch_size
================================================
FILE: autokeras/graph_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import keras_tuner
import pytest
import autokeras as ak
from autokeras import graph as graph_module
def test_input_output_disconnect():
input_node1 = ak.Input()
output_node = input_node1
_ = ak.DenseBlock()(output_node)
input_node = ak.Input()
output_node = input_node
output_node = ak.DenseBlock()(output_node)
output_node = ak.RegressionHead()(output_node)
with pytest.raises(ValueError) as info:
graph_module.Graph(inputs=input_node1, outputs=output_node)
assert "Inputs and outputs not connected." in str(info.value)
def test_hyper_graph_cycle():
input_node1 = ak.Input()
input_node2 = ak.Input()
output_node1 = ak.DenseBlock()(input_node1)
output_node2 = ak.DenseBlock()(input_node2)
output_node = ak.Merge()([output_node1, output_node2])
head = ak.RegressionHead()
output_node = head(output_node)
head.outputs = output_node1
with pytest.raises(ValueError) as info:
graph_module.Graph(
inputs=[input_node1, input_node2], outputs=output_node
)
assert "The network has a cycle." in str(info.value)
def test_input_missing():
input_node1 = ak.Input()
input_node2 = ak.Input()
output_node1 = ak.DenseBlock()(input_node1)
output_node2 = ak.DenseBlock()(input_node2)
output_node = ak.Merge()([output_node1, output_node2])
output_node = ak.RegressionHead()(output_node)
with pytest.raises(ValueError) as info:
graph_module.Graph(inputs=input_node1, outputs=output_node)
assert "A required input is missing for HyperModel" in str(info.value)
def test_graph_basics():
input_node = ak.Input(shape=(30,))
output_node = input_node
output_node = ak.DenseBlock()(output_node)
output_node = ak.RegressionHead(shape=(1,))(output_node)
model = graph_module.Graph(inputs=input_node, outputs=output_node).build(
keras_tuner.HyperParameters()
)
assert model.input_shape == (None, 30)
assert model.output_shape == (None, 1)
def test_adamw_optimizer():
input_node = ak.Input(shape=(30,))
output_node = input_node
output_node = ak.DenseBlock()(output_node)
output_node = ak.RegressionHead(shape=(1,))(output_node)
hp = keras_tuner.HyperParameters()
hp.Choice("optimizer", ["adam", "sgd", "adam_weight_decay"], default="adam")
hp.values["optimizer"] = "adam_weight_decay"
graph = graph_module.Graph(inputs=input_node, outputs=output_node)
graph.inputs[0].num_samples = 100
graph.inputs[0].batch_size = 32
graph.epochs = 10
graph.set_fit_args(0, epochs=10)
model = graph.build(hp)
assert model.input_shape == (None, 30)
assert model.output_shape == (None, 1)
def test_graph_save_load(tmp_path):
input1 = ak.Input()
input2 = ak.Input()
output1 = ak.DenseBlock()(input1)
output2 = ak.ConvBlock()(input2)
output = ak.Merge()([output1, output2])
output1 = ak.RegressionHead()(output)
output2 = ak.ClassificationHead()(output)
graph = graph_module.Graph(
inputs=[input1, input2],
outputs=[output1, output2],
)
path = os.path.join(tmp_path, "graph")
graph.save(path)
graph = graph_module.load_graph(path)
assert len(graph.inputs) == 2
assert len(graph.outputs) == 2
assert isinstance(graph.inputs[0].out_blocks[0], ak.DenseBlock)
assert isinstance(graph.inputs[1].out_blocks[0], ak.ConvBlock)
def test_merge():
input_node1 = ak.Input(shape=(30,))
input_node2 = ak.Input(shape=(40,))
output_node1 = ak.DenseBlock()(input_node1)
output_node2 = ak.DenseBlock()(input_node2)
output_node = ak.Merge()([output_node1, output_node2])
output_node = ak.RegressionHead(shape=(1,))(output_node)
model = graph_module.Graph(
inputs=[input_node1, input_node2], outputs=output_node
).build(keras_tuner.HyperParameters())
assert model.input_shape == [(None, 30), (None, 40)]
assert model.output_shape == (None, 1)
def test_save_custom_metrics_loss(tmp_path):
def custom_metric(y_pred, y_true):
return 1
def custom_loss(y_pred, y_true):
return y_pred - y_true
head = ak.ClassificationHead(
loss=custom_loss, metrics=["accuracy", custom_metric]
)
input_node = ak.Input()
output_node = head(input_node)
graph = graph_module.Graph(input_node, output_node)
path = os.path.join(tmp_path, "graph")
graph.save(path)
new_graph = graph_module.load_graph(
path,
custom_objects={
"custom_metric": custom_metric,
"custom_loss": custom_loss,
},
)
assert new_graph.blocks[0].metrics[1](0, 0) == 1
assert new_graph.blocks[0].loss(3, 2) == 1
def test_graph_can_init_with_one_missing_output():
input_node = ak.ImageInput()
output_node = ak.ConvBlock()(input_node)
output_node = ak.RegressionHead()(output_node)
ak.ClassificationHead()(output_node)
graph_module.Graph(input_node, output_node)
def test_set_fit_args_with_none_validation_split():
input_node = ak.Input(shape=(30,))
output_node = input_node
output_node = ak.DenseBlock()(output_node)
output_node = ak.RegressionHead(shape=(1,))(output_node)
graph = graph_module.Graph(inputs=input_node, outputs=output_node)
graph.inputs[0].num_samples = 100
graph.inputs[0].batch_size = 32
graph.set_fit_args(None, epochs=1)
assert graph.num_samples == 100 # Should handle None as 0
================================================
FILE: autokeras/hyper_preprocessors.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import keras
from autokeras import preprocessors
from autokeras.engine import hyper_preprocessor
from autokeras.utils import utils
def serialize(encoder):
return utils.serialize_keras_object(encoder)
def deserialize(config, custom_objects=None):
return utils.deserialize_keras_object(
config,
module_objects=globals(),
custom_objects=custom_objects,
)
@keras.utils.register_keras_serializable(package="autokeras")
class DefaultHyperPreprocessor(hyper_preprocessor.HyperPreprocessor):
"""HyperPreprocessor without Hyperparameters to tune.
It would always return the same preprocessor. No hyperparameters to be
tuned.
# Arguments
preprocessor: The Preprocessor to return when calling build.
"""
def __init__(self, preprocessor, *args, **kwargs):
super().__init__(*args, **kwargs)
self.preprocessor = preprocessor
def build(self, hp, dataset):
return self.preprocessor
def get_config(self):
config = super().get_config()
config.update(
{"preprocessor": preprocessors.serialize(self.preprocessor)}
)
return config
@classmethod
def from_config(cls, config):
config["preprocessor"] = preprocessors.deserialize(
config["preprocessor"]
)
return super().from_config(config)
================================================
FILE: autokeras/hyper_preprocessors_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from autokeras import hyper_preprocessors
from autokeras import preprocessors
def test_serialize_and_deserialize_default_hpps():
preprocessor = preprocessors.AddOneDimension()
hyper_preprocessor = hyper_preprocessors.DefaultHyperPreprocessor(
preprocessor
)
hyper_preprocessor = hyper_preprocessors.deserialize(
hyper_preprocessors.serialize(hyper_preprocessor)
)
assert isinstance(
hyper_preprocessor.preprocessor, preprocessors.AddOneDimension
)
def test_serialize_and_deserialize_default_hpps_categorical():
x_train = np.array([["a", "ab", 2.1], ["b", "bc", 1.0], ["a", "bc", "nan"]])
preprocessor = preprocessors.CategoricalToNumerical(
column_names=["column_a", "column_b", "column_c"],
column_types={
"column_a": "categorical",
"column_b": "categorical",
"column_c": "numerical",
},
)
hyper_preprocessor = hyper_preprocessors.DefaultHyperPreprocessor(
preprocessor
)
hyper_preprocessor.preprocessor.fit(x_train)
hyper_preprocessor = hyper_preprocessors.deserialize(
hyper_preprocessors.serialize(hyper_preprocessor)
)
assert isinstance(
hyper_preprocessor.preprocessor,
preprocessors.CategoricalToNumerical,
)
results = hyper_preprocessor.preprocessor.transform(x_train)
assert results[0][0] == results[2][0]
assert results[0][0] != results[1][0]
assert results[0][1] != results[1][1]
assert results[0][1] != results[2][1]
assert results[2][2] == 0
assert results.dtype == "float32"
================================================
FILE: autokeras/integration_tests/__init__.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
================================================
FILE: autokeras/integration_tests/functional_api_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import pandas as pd
import autokeras as ak
from autokeras import test_utils
def test_text_and_structured_data(tmp_path):
# Prepare the data.
num_instances = 3
x_text = test_utils.generate_text_data(num_instances)
x_structured_data = pd.read_csv(test_utils.TRAIN_CSV_PATH)
x_structured_data = x_structured_data.values
x_structured_data = x_structured_data[:num_instances]
y_classification = test_utils.generate_one_hot_labels(
num_instances=num_instances, num_classes=3
)
y_regression = test_utils.generate_data(
num_instances=num_instances, shape=(1,)
)
# Build model and train.
structured_data_input = ak.StructuredDataInput()
structured_data_output = ak.DenseBlock()(structured_data_input)
text_input = ak.TextInput()
text_output = ak.TextBlock()(text_input)
merged_outputs = ak.Merge()((structured_data_output, text_output))
regression_outputs = ak.RegressionHead()(merged_outputs)
classification_outputs = ak.ClassificationHead()(merged_outputs)
automodel = ak.AutoModel(
inputs=[text_input, structured_data_input],
directory=tmp_path,
outputs=[regression_outputs, classification_outputs],
max_trials=2,
tuner=ak.Hyperband,
seed=test_utils.SEED,
)
automodel.fit(
(x_text, x_structured_data),
(y_regression, y_classification),
validation_split=0.2,
epochs=1,
batch_size=2,
)
def test_image_blocks(tmp_path):
num_instances = 3
x_train = test_utils.generate_data(
num_instances=num_instances, shape=(28, 28)
)
y_train = np.random.randint(0, 10, num_instances)
input_node = ak.ImageInput()
output = ak.Normalization()(input_node)
output = ak.ImageAugmentation()(output)
outputs1 = ak.ResNetBlock(version="v2")(output)
outputs2 = ak.XceptionBlock()(output)
output_node = ak.Merge()((outputs1, outputs2))
output_node = ak.ClassificationHead()(output_node)
automodel = ak.AutoModel(
inputs=input_node,
outputs=output_node,
directory=tmp_path,
max_trials=1,
seed=test_utils.SEED,
)
automodel.fit(
x_train,
y_train,
validation_data=(x_train, y_train),
epochs=1,
batch_size=2,
)
================================================
FILE: autokeras/integration_tests/io_api_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pandas as pd
import autokeras as ak
from autokeras import test_utils
def test_io_api(tmp_path):
num_instances = 3
image_x = test_utils.generate_data(
num_instances=num_instances, shape=(28, 28)
)
text_x = test_utils.generate_text_data(num_instances=num_instances)
image_x = image_x[:num_instances]
structured_data_x = (
pd.read_csv(test_utils.TRAIN_CSV_PATH)
.to_numpy()
.astype(str)[:num_instances]
)
classification_y = test_utils.generate_one_hot_labels(
num_instances=num_instances, num_classes=3
)
regression_y = test_utils.generate_data(
num_instances=num_instances, shape=(1,)
)
# Build model and train.
automodel = ak.AutoModel(
inputs=[ak.ImageInput(), ak.TextInput(), ak.StructuredDataInput()],
outputs=[
ak.RegressionHead(metrics=["mae"]),
ak.ClassificationHead(
loss="categorical_crossentropy", metrics=["accuracy"]
),
],
directory=tmp_path,
max_trials=2,
tuner=ak.RandomSearch,
seed=test_utils.SEED,
)
automodel.fit(
[image_x, text_x, structured_data_x],
[regression_y, classification_y],
epochs=1,
validation_split=0.2,
batch_size=2,
)
automodel.predict([image_x, text_x, structured_data_x])
================================================
FILE: autokeras/integration_tests/task_api_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import keras
import numpy as np
import pandas as pd
import autokeras as ak
from autokeras import test_utils
NUM_INSTANCES = 3
BATCH_SIZE = 2
def test_image_classifier(tmp_path):
train_x = test_utils.generate_data(
num_instances=NUM_INSTANCES, shape=(32, 32)
)
train_y = test_utils.generate_one_hot_labels(
num_instances=NUM_INSTANCES, num_classes=10
)
clf = ak.ImageClassifier(
directory=tmp_path,
max_trials=2,
seed=test_utils.SEED,
)
clf.fit(
train_x, train_y, epochs=1, validation_split=0.2, batch_size=BATCH_SIZE
)
keras_model = clf.export_model()
clf.evaluate(train_x, train_y)
assert clf.predict(train_x).shape == (len(train_x), 10)
assert isinstance(keras_model, keras.Model)
def test_image_regressor(tmp_path):
train_x = test_utils.generate_data(
num_instances=NUM_INSTANCES, shape=(32, 32, 3)
)
train_y = test_utils.generate_data(num_instances=NUM_INSTANCES, shape=(1,))
clf = ak.ImageRegressor(
directory=tmp_path, max_trials=2, seed=test_utils.SEED
)
clf.fit(
train_x, train_y, epochs=1, validation_split=0.2, batch_size=BATCH_SIZE
)
clf.export_model()
assert clf.predict(train_x).shape == (len(train_x), 1)
def test_text_classifier(tmp_path):
train_x = test_utils.generate_text_data(num_instances=NUM_INSTANCES)
train_y = np.array([0, 1] * ((NUM_INSTANCES + 1) // 2))[:NUM_INSTANCES]
test_x = train_x
test_y = train_y
clf = ak.TextClassifier(
directory=tmp_path,
max_trials=2,
seed=test_utils.SEED,
metrics=["accuracy"],
objective="accuracy",
)
clf.fit(
train_x,
train_y,
epochs=1,
validation_data=(test_x, test_y),
batch_size=BATCH_SIZE,
)
clf.export_model()
assert clf.predict(test_x).shape == (len(test_x), 1)
assert clf.tuner._get_best_trial_epochs() <= 2
def test_text_regressor(tmp_path):
train_x = test_utils.generate_text_data(num_instances=NUM_INSTANCES)
test_x = train_x
train_y = test_utils.generate_data(num_instances=NUM_INSTANCES, shape=(1,))
test_y = train_y
clf = ak.TextRegressor(
directory=tmp_path, max_trials=2, seed=test_utils.SEED
)
clf.fit(
train_x,
train_y,
epochs=1,
validation_data=(test_x, test_y),
batch_size=BATCH_SIZE,
)
clf.predict(test_x)
clf.export_model()
assert clf.predict(test_x).shape == (len(test_x), 1)
def test_structured_data_regressor(tmp_path):
num_data = NUM_INSTANCES * 2
num_train = NUM_INSTANCES
data = (
pd.read_csv(test_utils.TRAIN_CSV_PATH).to_numpy().astype(str)[:num_data]
)
x_train, x_test = data[:num_train], data[num_train:]
y = test_utils.generate_data(num_instances=num_data, shape=tuple())
y_train, y_test = y[:num_train], y[num_train:]
clf = ak.StructuredDataRegressor(
directory=tmp_path, max_trials=2, seed=test_utils.SEED
)
clf.fit(
x_train,
y_train,
epochs=11,
validation_data=(x_train, y_train),
batch_size=BATCH_SIZE,
)
clf.export_model()
assert clf.predict(x_test).shape == (len(y_test), 1)
def test_structured_data_classifier(tmp_path):
num_data = NUM_INSTANCES * 2
num_train = NUM_INSTANCES
data = (
pd.read_csv(test_utils.TRAIN_CSV_PATH).to_numpy().astype(str)[:num_data]
)
x_train, x_test = data[:num_train], data[num_train:]
y = test_utils.generate_one_hot_labels(
num_instances=num_data, num_classes=3
)
y_train, y_test = y[:num_train], y[num_train:]
clf = ak.StructuredDataClassifier(
directory=tmp_path, max_trials=1, seed=test_utils.SEED
)
clf.fit(
x_train,
y_train,
epochs=2,
validation_data=(x_train, y_train),
batch_size=BATCH_SIZE,
)
clf.export_model()
assert clf.predict(x_test).shape == (len(y_test), 3)
================================================
FILE: autokeras/keras_layers.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import keras
from keras import layers
from keras import ops
from autokeras.utils import data_utils
INT = "int"
NONE = "none"
ONE_HOT = "one-hot"
class PreprocessingLayer(layers.Layer):
pass
@keras.utils.register_keras_serializable()
class CastToFloat32(PreprocessingLayer):
def get_config(self):
return super().get_config()
def call(self, inputs):
# Does not and needs not handle strings.
return data_utils.cast_to_float32(inputs)
def adapt(self, data):
return
@keras.utils.register_keras_serializable()
class ExpandLastDim(PreprocessingLayer):
def get_config(self):
return super().get_config()
def call(self, inputs):
return ops.expand_dims(inputs, axis=-1)
def adapt(self, data):
return
================================================
FILE: autokeras/keras_layers_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from autokeras import keras_layers as layer_module
def get_text_data():
train = np.array(
[
["This is a test example"],
["This is another text example"],
["Is this another example?"],
[""],
["Is this a long long long long long long example?"],
],
dtype=str,
)
test = np.array(
[
["This is a test example"],
["This is another text example"],
["Is this another example?"],
],
dtype=str,
)
y = np.random.rand(3, 1)
return train, test, y
def test_cast_to_float32_return_float32_tensor(tmp_path):
layer = layer_module.CastToFloat32()
tensor = layer(np.array([3], dtype="uint8"))
assert "float32" == tensor.numpy().dtype
def test_expand_last_dim_return_tensor_with_more_dims(tmp_path):
layer = layer_module.ExpandLastDim()
tensor = layer(np.array([0.1, 0.2], dtype="float32"))
assert 2 == len(tensor.shape)
================================================
FILE: autokeras/nodes.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Dict
from typing import List
from typing import Optional
import keras
import tree
from autokeras import adapters
from autokeras import analysers
from autokeras import blocks
from autokeras import hyper_preprocessors
from autokeras import keras_layers
from autokeras import preprocessors
from autokeras.engine import io_hypermodel
from autokeras.engine import node as node_module
from autokeras.utils import utils
def serialize(obj):
return utils.serialize_keras_object(obj)
def deserialize(config, custom_objects=None):
return utils.deserialize_keras_object(
config,
module_objects=globals(),
custom_objects=custom_objects,
)
@keras.utils.register_keras_serializable(package="autokeras")
class Input(node_module.Node, io_hypermodel.IOHyperModel):
"""Input node for tensor data.
The data should be numpy.ndarray.
# Arguments
name: String. The name of the input node. If unspecified, it will be set
automatically with the class name.
"""
def __init__(self, name: Optional[str] = None, **kwargs):
super().__init__(name=name, **kwargs)
def build_node(self, hp):
return keras.Input(shape=self.shape, dtype=self.dtype)
def build(self, hp, inputs=None):
input_node = tree.flatten(inputs)[0]
return keras_layers.CastToFloat32()(input_node)
def get_adapter(self):
return adapters.InputAdapter()
def get_analyser(self):
return analysers.InputAnalyser()
def get_block(self):
return blocks.GeneralBlock()
def get_hyper_preprocessors(self):
return []
class ImageInput(Input):
"""Input node for image data.
The input data should be numpy.ndarray. The shape of the
data should be should be (samples, width, height) or (samples, width,
height, channels).
# Arguments
name: String. The name of the input node. If unspecified, it will be set
automatically with the class name.
"""
def __init__(self, name: Optional[str] = None, **kwargs):
super().__init__(name=name, **kwargs)
def build(self, hp, inputs=None):
inputs = super().build(hp, inputs)
output_node = tree.flatten(inputs)[0]
if len(output_node.shape) == 3:
output_node = keras_layers.ExpandLastDim()(output_node)
return output_node
def get_adapter(self):
return adapters.ImageAdapter()
def get_analyser(self):
return analysers.ImageAnalyser()
def get_block(self):
return blocks.ImageBlock()
class TextInput(Input):
"""Input node for text data.
The input data should be numpy.ndarray. The data should be
one-dimensional. Each element in the data should be a string which is a
full sentence.
# Arguments
name: String. The name of the input node. If unspecified, it will be set
automatically with the class name.
"""
def __init__(self, name: Optional[str] = None, **kwargs):
super().__init__(name=name, **kwargs)
def build_node(self, hp):
return keras.Input(shape=self.shape, dtype="int32")
def build(self, hp, inputs=None):
output_node = tree.flatten(inputs)[0]
if len(output_node.shape) == 1:
output_node = keras_layers.ExpandLastDim()( # pragma: no cover
output_node
)
return output_node
def get_adapter(self):
return adapters.TextAdapter()
def get_analyser(self):
return analysers.TextAnalyser()
def get_block(self):
return blocks.TextBlock()
def get_hyper_preprocessors(self):
return [
hyper_preprocessors.DefaultHyperPreprocessor(
preprocessors.CastToString()
),
hyper_preprocessors.DefaultHyperPreprocessor(
preprocessors.TextTokenizer()
),
]
class StructuredDataInput(Input):
"""Input node for structured data.
The input data should be numpy.ndarray. The data should be two-dimensional
with numerical or categorical values.
# Arguments
column_names: A list of strings specifying the names of the columns. The
length of the list should be equal to the number of columns of the
data. Defaults to None.
column_types: Dict. The keys are the column names. The values should
either be 'numerical' or 'categorical', indicating the type of that
column. Defaults to None. If not None, the column_names need to be
specified. If None, it will be inferred from the data. A column will
be judged as categorical if the number of different values is less
than 5% of the number of instances.
name: String. The name of the input node. If unspecified, it will be set
automatically with the class name.
"""
def __init__(
self,
column_names: Optional[List[str]] = None,
column_types: Optional[Dict[str, str]] = None,
name: Optional[str] = None,
**kwargs
):
super().__init__(name=name, **kwargs)
self.column_names = column_names
self.column_types = column_types
def get_config(self):
config = super().get_config()
config.update(
{
"column_names": self.column_names,
"column_types": self.column_types,
}
)
return config
def get_adapter(self):
return adapters.StructuredDataAdapter()
def get_analyser(self):
return analysers.StructuredDataAnalyser(
self.column_names, self.column_types
)
def get_block(self):
return blocks.StructuredDataBlock()
def config_from_analyser(self, analyser):
super().config_from_analyser(analyser)
self.column_names = analyser.column_names
# Analyser keeps the specified ones and infer the missing ones.
self.column_types = analyser.column_types
def build(self, hp, inputs=None):
return inputs
def get_hyper_preprocessors(self):
return [
hyper_preprocessors.DefaultHyperPreprocessor(
preprocessors.CategoricalToNumerical(
column_names=self.column_names,
column_types=self.column_types,
)
)
]
================================================
FILE: autokeras/nodes_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from autokeras import blocks
from autokeras import nodes
def test_input_get_block_return_general_block():
input_node = nodes.Input()
assert isinstance(input_node.get_block(), blocks.GeneralBlock)
================================================
FILE: autokeras/pipeline.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import keras
import tree
from autokeras import preprocessors as preprocessors_module
from autokeras.engine import hyper_preprocessor as hpps_module
from autokeras.engine import preprocessor as pps_module
from autokeras.utils import io_utils
class HyperPipeline(hpps_module.HyperPreprocessor):
"""A search space consists of HyperPreprocessors.
# Arguments
inputs: a list of lists of HyperPreprocessors.
outputs: a list of lists of HyperPreprocessors.
"""
def __init__(self, inputs, outputs, **kwargs):
super().__init__(**kwargs)
self.inputs = inputs
self.outputs = outputs
@staticmethod
def _build_preprocessors(hp, hpps_lists, dataset):
sources = tree.flatten(dataset)
preprocessors_list = []
for source, hpps_list in zip(sources, hpps_lists):
data = source
preprocessors = []
for hyper_preprocessor in hpps_list:
preprocessor = hyper_preprocessor.build(hp, data)
preprocessor.fit(data)
data = preprocessor.transform(data)
preprocessors.append(preprocessor)
preprocessors_list.append(preprocessors)
return preprocessors_list
def build(self, hp, dataset):
"""Build a Pipeline by Hyperparameters.
# Arguments
hp: Hyperparameters.
dataset: nested numpy arrays. The input dataset for the model.
# Returns
An instance of Pipeline.
"""
x, y = dataset
return Pipeline(
inputs=self._build_preprocessors(hp, self.inputs, x),
outputs=self._build_preprocessors(hp, self.outputs, y),
)
def load_pipeline(filepath, custom_objects=None):
"""Load a Pipeline instance from disk."""
if custom_objects is None:
custom_objects = {}
with keras.utils.custom_object_scope(custom_objects):
return Pipeline.from_config(io_utils.load_json(filepath))
class Pipeline(pps_module.Preprocessor):
"""A data pipeline for transform the entire dataset.
# Arguments
inputs: A list of lists of Preprocessors. For the input datasets for
the model.
outputs: A list of lists of Preprocessors. For the target datasets for
the model.
"""
def __init__(self, inputs, outputs, **kwargs):
super().__init__(**kwargs)
self.inputs = inputs
self.outputs = outputs
def fit(self, dataset):
"""Fit the Preprocessors."""
x, y = dataset
sources_x = tree.flatten(x)
for pps_list, source in zip(self.inputs, sources_x):
for preprocessor in pps_list:
preprocessor.fit(source) # pragma: no cover
source = preprocessor.transform(source) # pragma: no cover
sources_y = tree.flatten(y)
for pps_list, source in zip(self.outputs, sources_y):
for preprocessor in pps_list:
preprocessor.fit(source)
source = preprocessor.transform(source)
return
def transform(self, dataset):
"""Transform the dataset to be ready for the model.
# Arguments
dataset: nested numpy arrays. The input dataset for the model.
# Returns
dataset: nested numpy arrays. The input dataset for the model.
"""
x, y = dataset
x = self.transform_x(x)
y = self.transform_y(y)
return (x, y)
def transform_x(self, dataset):
"""Transform the input dataset for the model.
# Arguments
dataset: nested numpy arrays. The input dataset for the model.
# Returns
dataset: nested numpy arrays. The input dataset for the model.
"""
return self._transform_data(dataset, self.inputs)
def transform_y(self, dataset):
"""Transform the target dataset for the model.
# Arguments
dataset: nested numpy arrays. The target dataset for the model.
# Returns
dataset: nested numpy arrays. The input dataset for the model.
"""
return self._transform_data(dataset, self.outputs)
def _transform_data(self, dataset, pps_lists):
sources = tree.flatten(dataset)
transformed = []
for pps_list, y in zip(pps_lists, sources):
for preprocessor in pps_list:
y = preprocessor.transform(y)
transformed.append(y)
if len(transformed) == 1:
return transformed[0]
return tuple(transformed)
def save(self, filepath):
io_utils.save_json(filepath, self.get_config())
def get_config(self):
return {
"inputs": [
[
preprocessors_module.serialize(preprocessor)
for preprocessor in preprocessors
]
for preprocessors in self.inputs
],
"outputs": [
[
preprocessors_module.serialize(preprocessor)
for preprocessor in preprocessors
]
for preprocessors in self.outputs
],
}
@classmethod
def from_config(cls, config):
return cls(
inputs=[
[
preprocessors_module.deserialize(preprocessor)
for preprocessor in preprocessors
]
for preprocessors in config["inputs"]
],
outputs=[
[
preprocessors_module.deserialize(preprocessor)
for preprocessor in preprocessors
]
for preprocessors in config["outputs"]
],
)
def postprocess(self, y):
"""Postprocess the outputs of the model.
# Arguments
y: numpy.ndarray or a list of numpy.ndarrays. The output of the
Keras model.
# Returns
A list or an instance of numpy.ndarray. The postprocessed data for
the heads.
"""
outputs = []
for source, preprocessors in zip(tree.flatten(y), self.outputs):
for preprocessor in preprocessors[::-1]:
if isinstance(preprocessor, pps_module.TargetPreprocessor):
source = preprocessor.postprocess(source)
outputs.append(source)
if len(outputs) == 1:
return outputs[0]
return outputs
================================================
FILE: autokeras/pipeline_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from autokeras import pipeline as pipeline_module
from autokeras import preprocessors
def test_pipeline_postprocess_one_hot_to_labels():
pipeline = pipeline_module.Pipeline(
inputs=[[]], outputs=[[preprocessors.OneHotEncoder(["a", "b", "c"])]]
)
assert np.array_equal(
pipeline.postprocess(np.eye(3)), [["a"], ["b"], ["c"]]
)
def test_pipeline_postprocess_multiple_one_hot_to_labels():
pipeline = pipeline_module.Pipeline(
inputs=[[]],
outputs=[
[preprocessors.OneHotEncoder(["a", "b", "c"])],
[preprocessors.OneHotEncoder(["a", "b", "c"])],
],
)
result = pipeline.postprocess([np.eye(3), np.eye(3)])
assert np.array_equal(result[0], [["a"], ["b"], ["c"]])
assert np.array_equal(result[1], [["a"], ["b"], ["c"]])
================================================
FILE: autokeras/preprocessors/__init__.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import keras
from autokeras.preprocessors.common import AddOneDimension
from autokeras.preprocessors.common import CastToInt32
from autokeras.preprocessors.common import CastToString
from autokeras.preprocessors.common import TextTokenizer
from autokeras.preprocessors.encoders import CategoricalToNumerical
from autokeras.preprocessors.encoders import LabelEncoder
from autokeras.preprocessors.encoders import OneHotEncoder
from autokeras.preprocessors.postprocessors import SigmoidPostprocessor
from autokeras.preprocessors.postprocessors import SoftmaxPostprocessor
from autokeras.utils import utils
def serialize(preprocessor):
return utils.serialize_keras_object(preprocessor)
def deserialize(config, custom_objects=None):
return utils.deserialize_keras_object(
config,
module_objects=globals(),
custom_objects=custom_objects,
)
================================================
FILE: autokeras/preprocessors/common.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import Counter
import keras
import numpy as np
from autokeras.engine import preprocessor
@keras.utils.register_keras_serializable(package="autokeras")
class AddOneDimension(preprocessor.Preprocessor):
"""Append one dimension of size one to the dataset shape."""
def transform(self, dataset):
return np.expand_dims(dataset, axis=-1)
@keras.utils.register_keras_serializable(package="autokeras")
class CastToInt32(preprocessor.Preprocessor):
"""Cast the dataset shape to int32."""
def transform(self, dataset):
return dataset.astype("int32")
@keras.utils.register_keras_serializable(package="autokeras")
class CastToString(preprocessor.Preprocessor):
"""Cast the dataset shape to string."""
def transform(self, dataset):
if np.issubdtype(dataset.dtype, np.bytes_):
return np.array(
[x.decode("utf-8", errors="ignore") for x in dataset]
)
else:
return dataset.astype("str")
@keras.utils.register_keras_serializable(package="autokeras")
class TextTokenizer(preprocessor.Preprocessor):
"""Simple text tokenizer that converts strings to integer sequences."""
def __init__(self, max_len=100, vocab=None, max_vocab=500, **kwargs):
super().__init__(**kwargs)
self.max_len = max_len
self.vocab = vocab
self.max_vocab = max_vocab
def fit(self, dataset):
# Build vocab from unique words in the dataset
unique_words = []
for text in dataset:
words = text.split()
unique_words.extend(words)
word_counts = Counter(unique_words)
sorted_words = sorted(word_counts, key=word_counts.get, reverse=True)
self.vocab = {
word: idx + 1
for idx, word in enumerate(sorted_words[: self.max_vocab])
} # Start from 1, 0 for padding
def transform(self, dataset):
# dataset is np.array of strings
sequences = []
for text in dataset:
words = text.split()[: self.max_len]
seq = [self.vocab.get(word, 0) for word in words] # 0 for unknown
# Pad with 0s
seq += [0] * (self.max_len - len(seq))
sequences.append(seq)
return np.array(sequences, dtype=np.int32)
def get_config(self):
config = super().get_config()
config.update(
{
"max_len": self.max_len,
"vocab": self.vocab,
"max_vocab": self.max_vocab,
}
)
return config
================================================
FILE: autokeras/preprocessors/common_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from autokeras import test_utils
from autokeras.preprocessors import common
def test_cast_to_int32_return_int32():
x = test_utils.generate_one_hot_labels(100, 10)
x = x.astype("uint8")
x = common.CastToInt32().transform(x)
assert x.dtype == "int32"
def test_cast_to_string_with_bytes():
x = np.array([b"hello", b"world"])
result = common.CastToString().transform(x)
assert result.dtype.kind in ["U", "S"] # Unicode or byte string
assert result[0] == "hello"
assert result[1] == "world"
def test_cast_to_string_with_strings():
x = np.array(["hello", "world"])
result = common.CastToString().transform(x)
assert result.dtype.kind in ["U", "S"]
assert result[0] == "hello"
assert result[1] == "world"
def test_text_tokenizer_vocab_limit():
x = np.array(["word1 word2 word3", "word1 word4 word5"])
tokenizer = common.TextTokenizer(max_vocab=2)
tokenizer.fit(x)
assert len(tokenizer.vocab) <= 3 # 2 words + 1 for unknown (0 is padding)
# word1 should be most frequent
assert "word1" in tokenizer.vocab
assert tokenizer.vocab["word1"] == 1
def test_text_tokenizer_transform():
x = np.array(["hello world", "hello"])
tokenizer = common.TextTokenizer(max_vocab=10)
tokenizer.fit(x)
result = tokenizer.transform(x)
assert result.shape == (2, 100) # max_len=100
assert result.dtype == np.int32
assert result[0][0] == tokenizer.vocab.get("hello", 0)
================================================
FILE: autokeras/preprocessors/encoders.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import keras
import numpy as np
from autokeras import analysers
from autokeras.engine import preprocessor
@keras.utils.register_keras_serializable(package="autokeras")
class Encoder(preprocessor.TargetPreprocessor):
"""Transform labels to encodings.
# Arguments
labels: A list of labels of any type. The labels to be encoded.
"""
def __init__(self, labels, **kwargs):
super().__init__(**kwargs)
self.labels = [
label.decode("utf-8") if isinstance(label, bytes) else str(label)
for label in labels
]
def get_config(self):
return {"labels": self.labels}
def fit(self, dataset):
return
def transform(self, dataset):
"""Transform labels to integer encodings.
# Arguments
dataset: numpy.ndarray. The dataset to be transformed.
# Returns
numpy.ndarray. The transformed dataset.
"""
label_to_idx = {label: idx for idx, label in enumerate(self.labels)}
return np.array(
[
label_to_idx.get(str(label), len(self.labels))
for label in dataset.flatten()
]
).reshape(dataset.shape)
@keras.utils.register_keras_serializable(package="autokeras")
class OneHotEncoder(Encoder):
def transform(self, dataset):
"""Transform labels to one-hot encodings.
# Arguments
dataset: numpy.ndarray. The dataset to be transformed.
# Returns
numpy.ndarray. The transformed dataset.
"""
dataset = super().transform(dataset)
eye = np.eye(len(self.labels))
encoded_int = dataset.squeeze(axis=-1)
result = np.zeros((len(encoded_int), len(self.labels)))
for i, idx in enumerate(encoded_int):
if idx < len(self.labels):
result[i] = eye[idx]
return result
def postprocess(self, data):
"""Transform probabilities back to labels.
# Arguments
data: numpy.ndarray. The output probabilities of the classification
head.
# Returns
numpy.ndarray. The original labels.
"""
return np.array(
list(
map(
lambda x: (
self.labels[x] if x < len(self.labels) else "unknown"
),
np.argmax(np.array(data), axis=1),
)
)
).reshape(-1, 1)
@keras.utils.register_keras_serializable(package="autokeras")
class LabelEncoder(Encoder):
"""Transform the labels to integer encodings."""
def transform(self, dataset):
"""Transform labels to integer encodings.
# Arguments
dataset: numpy.ndarray. The dataset to be transformed.
# Returns
numpy.ndarray. The transformed dataset.
"""
dataset = super().transform(dataset)
return dataset
def postprocess(self, data):
"""Transform probabilities back to labels.
# Arguments
data: numpy.ndarray. The output probabilities of the classification
head.
# Returns
numpy.ndarray. The original labels.
"""
return np.array(
list(
map(
lambda x: (
self.labels[int(round(x[0]))]
if int(round(x[0])) < len(self.labels)
else "unknown"
),
np.array(data),
)
)
).reshape(-1, 1)
@keras.utils.register_keras_serializable()
class CategoricalToNumerical(preprocessor.Preprocessor):
"""Encode the categorical features to numerical features.
# Arguments
column_names: A list of strings specifying the names of the columns. The
length of the list should be equal to the number of columns of the
data.
column_types: Dict. The keys are the column names. The values should
either be 'numerical' or 'categorical', indicating the type of that
column. Defaults to None. If not None, the column_names need to be
specified. If None, it will be inferred from the data.
"""
# TODO: Support one-hot encoding.
# TODO: Support frequency encoding.
def __init__(self, column_names, column_types, **kwargs):
super().__init__(**kwargs)
self.column_names = column_names
self.column_types = column_types
self.encoding = []
for column_name in self.column_names:
column_type = self.column_types[column_name]
if column_type == analysers.CATEGORICAL:
# TODO: Search to use one-hot or int.
self.encoding.append("int")
else:
self.encoding.append("none")
self.encoders = [None] * len(self.encoding)
def fit(self, dataset):
for i, enc_type in enumerate(self.encoding):
if enc_type == "none":
continue
column_data = dataset[:, i]
unique_labels = np.unique(column_data)
if enc_type == "int":
self.encoders[i] = LabelEncoder(unique_labels)
elif enc_type == "one_hot": # pragma: no cover
self.encoders[i] = OneHotEncoder( # pragma: no cover
unique_labels
)
else:
raise ValueError( # pragma: no cover
f"Unsupported encoding: {enc_type}"
)
def transform(self, dataset):
outputs = []
for i, enc_type in enumerate(self.encoding):
column_data = dataset[:, i : i + 1]
if enc_type == "none":
column_data = column_data.astype("float32")
# Replace NaN with 0.
imputed = np.where(np.isnan(column_data), 0, column_data)
outputs.append(imputed)
else:
encoded = self.encoders[i].transform(column_data)
outputs.append(encoded)
return np.concatenate(outputs, axis=1).astype("float32")
def get_config(self):
encoders_config = []
for enc in self.encoders:
if enc is None:
encoders_config.append(
{"encoder_type": None, "encoder_config": None}
)
else:
encoder_type = (
"label" if isinstance(enc, LabelEncoder) else "one_hot"
)
encoders_config.append(
{
"encoder_type": encoder_type,
"encoder_config": enc.get_config(),
}
)
return {
"column_types": self.column_types,
"column_names": self.column_names,
"encoding": self.encoding,
"encoders": encoders_config,
}
@classmethod
def from_config(cls, config):
obj = cls(config["column_names"], config["column_types"])
obj.encoding = config["encoding"]
obj.encoders = []
for item in config["encoders"]:
if item["encoder_type"] is None:
obj.encoders.append(None)
elif item["encoder_type"] == "label":
obj.encoders.append(LabelEncoder(**item["encoder_config"]))
elif item["encoder_type"] == "one_hot": # pragma: no cover
obj.encoders.append( # pragma: no cover
OneHotEncoder(**item["encoder_config"])
)
return obj
================================================
FILE: autokeras/preprocessors/encoders_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from autokeras import preprocessors
from autokeras.preprocessors import encoders
from autokeras.utils import data_utils
def test_one_hot_encoder_deserialize_transforms_to_np():
encoder = encoders.OneHotEncoder(["a", "b", "c"])
encoder.fit(np.array(["a", "b", "a"]))
encoder = preprocessors.deserialize(preprocessors.serialize(encoder))
one_hot = encoder.transform(np.array([["a"], ["c"], ["b"]]))
assert one_hot.shape[1:] == (3,)
def test_one_hot_encoder_decode_to_same_string():
encoder = encoders.OneHotEncoder(["a", "b", "c"])
result = encoder.postprocess(np.eye(3))
assert np.array_equal(result, np.array([["a"], ["b"], ["c"]]))
def test_label_encoder_decode_to_same_string():
encoder = encoders.LabelEncoder(["a", "b"])
result = encoder.postprocess([[0], [1]])
assert np.array_equal(result, np.array([["a"], ["b"]]))
def test_label_encoder_encode_to_correct_shape():
encoder = encoders.LabelEncoder(["a", "b"])
dataset = np.array([["a"], ["b"]])
result = encoder.transform(dataset)
print(data_utils.dataset_shape(result))
assert np.array_equal(data_utils.dataset_shape(result), [2, 1])
================================================
FILE: autokeras/preprocessors/postprocessors.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import keras
import numpy as np
from autokeras.engine import preprocessor
@keras.utils.register_keras_serializable(package="autokeras")
class PostProcessor(preprocessor.TargetPreprocessor):
def transform(self, dataset):
return dataset
@keras.utils.register_keras_serializable(package="autokeras")
class SigmoidPostprocessor(PostProcessor):
"""Postprocessor for sigmoid outputs."""
def postprocess(self, data):
"""Transform probabilities to zeros and ones.
# Arguments
data: numpy.ndarray. The output probabilities of the classification
head.
# Returns
numpy.ndarray. The zeros and ones predictions.
"""
data[data < 0.5] = 0
data[data > 0.5] = 1
return data
@keras.utils.register_keras_serializable(package="autokeras")
class SoftmaxPostprocessor(PostProcessor):
"""Postprocessor for softmax outputs."""
def postprocess(self, data):
"""Transform probabilities to zeros and ones.
# Arguments
data: numpy.ndarray. The output probabilities of the classification
head.
# Returns
numpy.ndarray. The zeros and ones predictions.
"""
idx = np.argmax(data, axis=-1)
data = np.zeros(data.shape)
data[np.arange(data.shape[0]), idx] = 1
return data
================================================
FILE: autokeras/preprocessors/postprocessors_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from autokeras import preprocessors
from autokeras.preprocessors import postprocessors
def test_sigmoid_postprocess_to_zero_one():
postprocessor = postprocessors.SigmoidPostprocessor()
y = postprocessor.postprocess(np.random.rand(10, 3))
assert set(y.flatten().tolist()) == set([1, 0])
def test_sigmoid_deserialize_without_error():
postprocessor = postprocessors.SigmoidPostprocessor()
dataset = np.array([1, 2])
postprocessor = preprocessors.deserialize(
preprocessors.serialize(postprocessor)
)
assert isinstance(postprocessor.transform(dataset), np.ndarray)
def test_softmax_postprocess_to_zero_one():
postprocessor = postprocessors.SoftmaxPostprocessor()
y = postprocessor.postprocess(np.random.rand(10, 3))
assert set(y.flatten().tolist()) == set([1, 0])
def test_softmax_transform_dataset_doesnt_change():
postprocessor = postprocessors.SoftmaxPostprocessor()
dataset = np.array([1, 2])
assert isinstance(postprocessor.transform(dataset), np.ndarray)
def test_softmax_deserialize_without_error():
postprocessor = postprocessors.SoftmaxPostprocessor()
dataset = np.array([1, 2])
postprocessor = preprocessors.deserialize(
preprocessors.serialize(postprocessor)
)
assert isinstance(postprocessor.transform(dataset), np.ndarray)
================================================
FILE: autokeras/tasks/__init__.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from autokeras.tasks.image import ImageClassifier
from autokeras.tasks.image import ImageRegressor
from autokeras.tasks.structured_data import StructuredDataClassifier
from autokeras.tasks.structured_data import StructuredDataRegressor
from autokeras.tasks.text import TextClassifier
from autokeras.tasks.text import TextRegressor
================================================
FILE: autokeras/tasks/image.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from pathlib import Path
from typing import List
from typing import Optional
from typing import Tuple
from typing import Type
from typing import Union
import keras
from autokeras import auto_model
from autokeras import blocks
from autokeras import nodes as input_module
from autokeras.engine import tuner
from autokeras.tuners import greedy
from autokeras.tuners import task_specific
from autokeras.utils import types
class SupervisedImagePipeline(auto_model.AutoModel):
def __init__(self, outputs, **kwargs):
super().__init__(
inputs=input_module.ImageInput(), outputs=outputs, **kwargs
)
class ImageClassifier(SupervisedImagePipeline):
"""AutoKeras image classification class.
# Arguments
num_classes: Int. Defaults to None. If None, it will be inferred from
the data.
multi_label: Boolean. Defaults to False.
loss: A Keras loss function. Defaults to use 'binary_crossentropy' or
'categorical_crossentropy' based on the number of classes.
metrics: A list of Keras metrics. Defaults to use 'accuracy'.
project_name: String. The name of the AutoModel.
Defaults to 'image_classifier'.
max_trials: Int. The maximum number of different Keras Models to try.
The search may finish before reaching the max_trials. Defaults to
100.
directory: String. The path to a directory for storing the search
outputs. Defaults to None, which would create a folder with the
name of the AutoModel in the current directory.
objective: String. Name of model metric to minimize
or maximize, e.g. 'val_accuracy'. Defaults to 'val_loss'.
tuner: String or subclass of AutoTuner. If string, it should be one of
'greedy', 'bayesian', 'hyperband' or 'random'. It can also be a
subclass of AutoTuner. If left unspecified, it uses a task specific
tuner, which first evaluates the most commonly used models for the
task before exploring other models.
overwrite: Boolean. Defaults to `False`. If `False`, reloads an existing
project of the same name if one is found. Otherwise, overwrites the
project.
seed: Int. Random seed.
max_model_size: Int. Maximum number of scalars in the parameters of a
model. Models larger than this are rejected.
**kwargs: Any arguments supported by AutoModel.
"""
def __init__(
self,
num_classes: Optional[int] = None,
multi_label: bool = False,
loss: types.LossType = None,
metrics: Optional[types.MetricsType] = None,
project_name: str = "image_classifier",
max_trials: int = 100,
directory: Union[str, Path, None] = None,
objective: str = "val_loss",
tuner: Union[str, Type[tuner.AutoTuner]] = None,
overwrite: bool = False,
seed: Optional[int] = None,
max_model_size: Optional[int] = None,
**kwargs
):
if tuner is None:
tuner = task_specific.ImageClassifierTuner
super().__init__(
outputs=blocks.ClassificationHead(
num_classes=num_classes,
multi_label=multi_label,
loss=loss,
metrics=metrics,
),
max_trials=max_trials,
directory=directory,
project_name=project_name,
objective=objective,
tuner=tuner,
overwrite=overwrite,
seed=seed,
max_model_size=max_model_size,
**kwargs
)
def fit(
self,
x: Optional[types.DatasetType] = None,
y: Optional[types.DatasetType] = None,
epochs: Optional[int] = None,
callbacks: Optional[List[keras.callbacks.Callback]] = None,
validation_split: Optional[float] = 0.2,
validation_data: Union[
Tuple[types.DatasetType, types.DatasetType], None
] = None,
**kwargs
):
"""Search for the best model and hyperparameters for the AutoModel.
It will search for the best model based on the performances on
validation data.
# Arguments
x: numpy.ndarray. Training data x. The shape
of the data should be (samples, width, height) or (samples,
width, height, channels).
y: numpy.ndarray. Training data y. It can be
raw labels, one-hot encoded if more than two classes, or binary
encoded for binary classification.
epochs: Int. The number of epochs to train each model during the
search. If unspecified, by default we train for a maximum of
1000 epochs, but we stop training if the validation loss stops
improving for 10 epochs (unless you specified an EarlyStopping
callback as part of the callbacks argument, in which case the
EarlyStopping callback you specified will determine early
stopping).
callbacks: List of Keras callbacks to apply during training and
validation.
validation_split: Float between 0 and 1. Defaults to 0.2. Fraction
of the training data to be used as validation data. The model
will set apart this fraction of the training data, will not
train on it, and will evaluate the loss and any model metrics on
this data at the end of each epoch. The validation data is
selected from the last samples in the `x` and `y` data provided,
before shuffling. This argument is not supported when `x` is a
dataset. The best model found would be fit on the entire dataset
including the validation data.
validation_data: Data on which to evaluate the loss and any model
metrics at the end of each epoch. The model will not be trained
on this data. `validation_data` will override
`validation_split`. The type of the validation data should be
the same as the training data. The best model found would be
fit on the training dataset without the validation data.
**kwargs: Any arguments supported by
[keras.Model.fit](https://keras.io/api/models/model_training_apis/#fit-method).
# Returns
history: A Keras History object corresponding to the best model.
Its History.history attribute is a record of training loss
values and metrics values at successive epochs, as well as
validation loss values and validation metrics values (if
applicable).
"""
history = super().fit(
x=x,
y=y,
epochs=epochs,
callbacks=callbacks,
validation_split=validation_split,
validation_data=validation_data,
**kwargs
)
return history
class ImageRegressor(SupervisedImagePipeline):
"""AutoKeras image regression class.
# Arguments
output_dim: Int. The number of output dimensions. Defaults to None.
If None, it will be inferred from the data.
loss: A Keras loss function. Defaults to use 'mean_squared_error'.
metrics: A list of Keras metrics. Defaults to use 'mean_squared_error'.
project_name: String. The name of the AutoModel.
Defaults to 'image_regressor'.
max_trials: Int. The maximum number of different Keras Models to try.
The search may finish before reaching the max_trials. Defaults to
100.
directory: String. The path to a directory for storing the search
outputs. Defaults to None, which would create a folder with the
name of the AutoModel in the current directory.
objective: String. Name of model metric to minimize
or maximize, e.g. 'val_accuracy'. Defaults to 'val_loss'.
tuner: String or subclass of AutoTuner. If string, it should be one of
'greedy', 'bayesian', 'hyperband' or 'random'. It can also be a
subclass of AutoTuner. If left unspecified, it uses a task specific
tuner, which first evaluates the most commonly used models for the
task before exploring other models.
overwrite: Boolean. Defaults to `False`. If `False`, reloads an existing
project of the same name if one is found. Otherwise, overwrites the
project.
seed: Int. Random seed.
max_model_size: Int. Maximum number of scalars in the parameters of a
model. Models larger than this are rejected.
**kwargs: Any arguments supported by AutoModel.
"""
def __init__(
self,
output_dim: Optional[int] = None,
loss: types.LossType = "mean_squared_error",
metrics: Optional[types.MetricsType] = None,
project_name: str = "image_regressor",
max_trials: int = 100,
directory: Union[str, Path, None] = None,
objective: str = "val_loss",
tuner: Union[str, Type[tuner.AutoTuner]] = None,
overwrite: bool = False,
seed: Optional[int] = None,
max_model_size: Optional[int] = None,
**kwargs
):
if tuner is None:
tuner = greedy.Greedy
super().__init__(
outputs=blocks.RegressionHead(
output_dim=output_dim, loss=loss, metrics=metrics
),
max_trials=max_trials,
directory=directory,
project_name=project_name,
objective=objective,
tuner=tuner,
overwrite=overwrite,
seed=seed,
max_model_size=max_model_size,
**kwargs
)
def fit(
self,
x: Optional[types.DatasetType] = None,
y: Optional[types.DatasetType] = None,
epochs: Optional[int] = None,
callbacks: Optional[List[keras.callbacks.Callback]] = None,
validation_split: Optional[float] = 0.2,
validation_data: Union[
types.DatasetType, Tuple[types.DatasetType], None
] = None,
**kwargs
):
"""Search for the best model and hyperparameters for the AutoModel.
It will search for the best model based on the performances on
validation data.
# Arguments
x: numpy.ndarray. Training data x. The shape
of the data should be (samples, width, height) or (samples,
width, height, channels).
y: numpy.ndarray. Training data y. The targets
passing to the head would have to be np.ndarray. It can be
single-column or multi-column. The values should all be
numerical.
epochs: Int. The number of epochs to train each model during the
search. If unspecified, by default we train for a maximum of
1000 epochs, but we stop training if the validation loss stops
improving for 10 epochs (unless you specified an EarlyStopping
callback as part of the callbacks argument, in which case the
EarlyStopping callback you specified will determine early
stopping).
callbacks: List of Keras callbacks to apply during training and
validation.
validation_split: Float between 0 and 1. Defaults to 0.2. Fraction
of the training data to be used as validation data. The model
will set apart this fraction of the training data, will not
train on it, and will evaluate the loss and any model metrics on
this data at the end of each epoch. The validation data is
selected from the last samples in the `x` and `y` data provided,
before shuffling. This argument is not supported when `x` is a
dataset. The best model found would be fit on the entire
dataset including the validation data.
validation_data: Data on which to evaluate the loss and any model
metrics at the end of each epoch. The model will not be trained
on this data. `validation_data` will override
`validation_split`. The type of the validation data should be
the same as the training data. The best model found would be
fit on the training dataset without the validation data.
**kwargs: Any arguments supported by
[keras.Model.fit](https://keras.io/api/models/model_training_apis/#fit-method).
# Returns
history: A Keras History object corresponding to the best model.
Its History.history attribute is a record of training
loss values and metrics values at successive epochs, as well as
validation loss values and validation metrics values (if
applicable).
"""
history = super().fit(
x=x,
y=y,
epochs=epochs,
callbacks=callbacks,
validation_split=validation_split,
validation_data=validation_data,
**kwargs
)
return history
================================================
FILE: autokeras/tasks/image_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from unittest import mock
import autokeras as ak
from autokeras import test_utils
@mock.patch("autokeras.AutoModel.fit")
def test_img_clf_fit_call_auto_model_fit(fit, tmp_path):
auto_model = ak.ImageClassifier(directory=tmp_path, seed=test_utils.SEED)
auto_model.fit(
x=test_utils.generate_data(num_instances=100, shape=(32, 32, 3)),
y=test_utils.generate_one_hot_labels(num_instances=100, num_classes=10),
)
assert fit.is_called
@mock.patch("autokeras.AutoModel.fit")
def test_img_reg_fit_call_auto_model_fit(fit, tmp_path):
auto_model = ak.ImageRegressor(directory=tmp_path, seed=test_utils.SEED)
auto_model.fit(
x=test_utils.generate_data(num_instances=100, shape=(32, 32, 3)),
y=test_utils.generate_data(num_instances=100, shape=(1,)),
)
assert fit.is_called
================================================
FILE: autokeras/tasks/structured_data.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pathlib
from typing import Dict
from typing import List
from typing import Optional
from typing import Type
from typing import Union
import tree
from autokeras import auto_model
from autokeras import blocks
from autokeras import nodes as input_module
from autokeras.engine import tuner
from autokeras.tuners import task_specific
from autokeras.utils import types
class BaseStructuredDataPipeline(auto_model.AutoModel):
def __init__(self, inputs, outputs, **kwargs):
self.check(inputs.column_names, inputs.column_types)
super().__init__(inputs=inputs, outputs=outputs, **kwargs)
self._target_col_name = None
def check(self, column_names, column_types):
if column_types:
for column_type in column_types.values():
if column_type not in ["categorical", "numerical"]:
raise ValueError(
'column_types should be either "categorical" '
'or "numerical", but got {name}'.format(
name=column_type
)
)
def check_in_fit(self, x):
input_node = tree.flatten(self.inputs)[0]
if input_node.column_names and input_node.column_types:
for column_name in input_node.column_types:
if column_name not in input_node.column_names:
raise ValueError(
"column_names and column_types are "
"mismatched. Cannot find column name "
"{name} in the data.".format(name=column_name)
)
def fit(
self,
x=None,
y=None,
epochs=None,
callbacks=None,
validation_split=0.2,
validation_data=None,
**kwargs
):
"""Search for the best model and hyperparameters for the AutoModel.
# Arguments
x: numpy.ndarray.
Training data x. It can be string or numerical data.
y: numpy.ndarray. Training data y.
It can be raw labels, one-hot encoded if more than two classes,
or binary encoded for binary classification.
epochs: Int. The number of epochs to train each model during the
search. If unspecified, we would use epochs equal to 1000 and
early stopping with patience equal to 10.
callbacks: List of Keras callbacks to apply during training and
validation.
validation_split: Float between 0 and 1. Defaults to 0.2. Fraction
of the training data to be used as validation data. The model
will set apart this fraction of the training data, will not
train on it, and will evaluate the loss and any model metrics on
this data at the end of each epoch. The validation data is
selected from the last samples in the `x` and `y` data provided,
before shuffling. This argument is not supported when `x` is a
dataset.
validation_data: Data on which to evaluate the loss and any model
metrics at the end of each epoch. The model will not be trained
on this data. `validation_data` will override
`validation_split`. The type of the validation data should be
the same as the training data. The best model found would be
fit on the training dataset without the validation data.
**kwargs: Any arguments supported by
[keras.Model.fit](https://keras.io/api/models/model_training_apis/#fit-method).
# Returns
history: A Keras History object corresponding to the best model.
Its History.history attribute is a record of training
loss values and metrics values at successive epochs, as well as
validation loss values and validation metrics values (if
applicable).
"""
self.check_in_fit(x)
history = super().fit(
x=x,
y=y,
epochs=epochs,
callbacks=callbacks,
validation_split=validation_split,
validation_data=validation_data,
**kwargs
)
return history
def predict(self, x, **kwargs):
"""Predict the output for a given testing data.
# Arguments
x: numpy.ndarray.
Testing data x. It can be string or numerical data.
**kwargs: Any arguments supported by keras.Model.predict.
# Returns
A list of numpy.ndarray objects or a single numpy.ndarray.
The predicted results.
"""
return super().predict(x=x, **kwargs)
def evaluate(self, x, y=None, **kwargs):
"""Evaluate the best model for the given data.
# Arguments
x: numpy.ndarray.
Testing data x. It can be string or numerical data.
y: numpy.ndarray. Testing data y.
It can be string labels or numerical values.
**kwargs: Any arguments supported by keras.Model.evaluate.
# Returns
Scalar test loss (if the model has a single output and no metrics)
or list of scalars (if the model has multiple outputs and/or
metrics). The attribute model.metrics_names will give you the
display labels for the scalar outputs.
"""
return super().evaluate(x=x, y=y, **kwargs)
class SupervisedStructuredDataPipeline(BaseStructuredDataPipeline):
def __init__(self, outputs, column_names, column_types, **kwargs):
inputs = input_module.StructuredDataInput(
column_names=column_names, column_types=column_types
)
super().__init__(inputs=inputs, outputs=outputs, **kwargs)
class StructuredDataClassifier(SupervisedStructuredDataPipeline):
"""AutoKeras structured data classification class.
# Arguments
column_names: A list of strings specifying the names of the columns. The
length of the list should be equal to the number of columns of the
data excluding the target column. Defaults to None.
column_types: Dict. The keys are the column names. The values should
either be 'numerical' or 'categorical', indicating the type of that
column. Defaults to None. If not None, the column_names need to be
specified. If None, it will be inferred from the data.
num_classes: Int. Defaults to None. If None, it will be inferred from
the data.
multi_label: Boolean. Defaults to False.
loss: A Keras loss function. Defaults to use 'binary_crossentropy' or
'categorical_crossentropy' based on the number of classes.
metrics: A list of Keras metrics. Defaults to use 'accuracy'.
project_name: String. The name of the AutoModel. Defaults to
'structured_data_classifier'.
max_trials: Int. The maximum number of different Keras Models to try.
The search may finish before reaching the max_trials. Defaults to
100.
directory: String. The path to a directory for storing the search
outputs. Defaults to None, which would create a folder with the
name of the AutoModel in the current directory.
objective: String. Name of model metric to minimize
or maximize. Defaults to 'val_accuracy'.
tuner: String or subclass of AutoTuner. If string, it should be one of
'greedy', 'bayesian', 'hyperband' or 'random'. It can also be a
subclass of AutoTuner. If left unspecified, it uses a task specific
tuner, which first evaluates the most commonly used models for the
task before exploring other models.
overwrite: Boolean. Defaults to `False`. If `False`, reloads an existing
project of the same name if one is found. Otherwise, overwrites the
project.
seed: Int. Random seed.
max_model_size: Int. Maximum number of scalars in the parameters of a
model. Models larger than this are rejected.
**kwargs: Any arguments supported by AutoModel.
"""
def __init__(
self,
column_names: Optional[List[str]] = None,
column_types: Optional[Dict] = None,
num_classes: Optional[int] = None,
multi_label: bool = False,
loss: Optional[types.LossType] = None,
metrics: Optional[types.MetricsType] = None,
project_name: str = "structured_data_classifier",
max_trials: int = 100,
directory: Optional[Union[str, pathlib.Path]] = None,
objective: str = "val_accuracy",
tuner: Union[str, Type[tuner.AutoTuner]] = None,
overwrite: bool = False,
seed: Optional[int] = None,
max_model_size: Optional[int] = None,
**kwargs
):
if tuner is None:
tuner = task_specific.StructuredDataClassifierTuner
super().__init__(
outputs=blocks.ClassificationHead(
num_classes=num_classes,
multi_label=multi_label,
loss=loss,
metrics=metrics,
),
column_names=column_names,
column_types=column_types,
max_trials=max_trials,
directory=directory,
project_name=project_name,
objective=objective,
tuner=tuner,
overwrite=overwrite,
seed=seed,
max_model_size=max_model_size,
**kwargs
)
def fit(
self,
x=None,
y=None,
epochs=None,
callbacks=None,
validation_split=0.2,
validation_data=None,
**kwargs
):
"""Search for the best model and hyperparameters for the AutoModel.
# Arguments
x: numpy.ndarray. Training data x.
y: numpy.ndarray. Training data y.
epochs: Int. The number of epochs to train each model during the
search. If unspecified, we would use epochs equal to 1000 and
early stopping with patience equal to 10.
callbacks: List of Keras callbacks to apply during training and
validation.
validation_split: Float between 0 and 1. Defaults to 0.2. Fraction
of the training data to be used as validation data. The model
will set apart this fraction of the training data, will not
train on it, and will evaluate the loss and any model metrics on
this data at the end of each epoch. The validation data is
selected from the last samples in the `x` and `y` data provided,
before shuffling. This argument is not supported when `x` is a
dataset.
validation_data: Data on which to evaluate the loss and any model
metrics at the end of each epoch. The model will not be trained
on this data. `validation_data` will override
`validation_split`. The type of the validation data should be
the same as the training data.
**kwargs: Any arguments supported by
[keras.Model.fit](https://keras.io/api/models/model_training_apis/#fit-method).
# Returns
history: A Keras History object corresponding to the best model.
Its History.history attribute is a record of training
loss values and metrics values at successive epochs, as well as
validation loss values and validation metrics values (if
applicable).
"""
history = super().fit(
x=x,
y=y,
epochs=epochs,
callbacks=callbacks,
validation_split=validation_split,
validation_data=validation_data,
**kwargs
)
return history
class StructuredDataRegressor(SupervisedStructuredDataPipeline):
"""AutoKeras structured data regression class.
# Arguments
column_names: A list of strings specifying the names of the columns. The
length of the list should be equal to the number of columns of the
data excluding the target column. Defaults to None.
column_types: Dict. The keys are the column names. The values should
either be 'numerical' or 'categorical', indicating the type of that
column. Defaults to None. If not None, the column_names need to be
specified. If None, it will be inferred from the data.
output_dim: Int. The number of output dimensions. Defaults to None.
If None, it will be inferred from the data.
loss: A Keras loss function. Defaults to use 'mean_squared_error'.
metrics: A list of Keras metrics. Defaults to use 'mean_squared_error'.
project_name: String. The name of the AutoModel. Defaults to
'structured_data_regressor'.
max_trials: Int. The maximum number of different Keras Models to try.
The search may finish before reaching the max_trials. Defaults to
100.
directory: String. The path to a directory for storing the search
outputs. Defaults to None, which would create a folder with the
name of the AutoModel in the current directory.
objective: String. Name of model metric to minimize
or maximize, e.g. 'val_accuracy'. Defaults to 'val_loss'.
tuner: String or subclass of AutoTuner. If string, it should be one of
'greedy', 'bayesian', 'hyperband' or 'random'. It can also be a
subclass of AutoTuner. If left unspecified, it uses a task specific
tuner, which first evaluates the most commonly used models for the
task before exploring other models.
overwrite: Boolean. Defaults to `False`. If `False`, reloads an existing
project of the same name if one is found. Otherwise, overwrites the
project.
seed: Int. Random seed.
max_model_size: Int. Maximum number of scalars in the parameters of a
model. Models larger than this are rejected.
**kwargs: Any arguments supported by AutoModel.
"""
def __init__(
self,
column_names: Optional[List[str]] = None,
column_types: Optional[Dict[str, str]] = None,
output_dim: Optional[int] = None,
loss: types.LossType = "mean_squared_error",
metrics: Optional[types.MetricsType] = None,
project_name: str = "structured_data_regressor",
max_trials: int = 100,
directory: Union[str, pathlib.Path, None] = None,
objective: str = "val_loss",
tuner: Union[str, Type[tuner.AutoTuner]] = None,
overwrite: bool = False,
seed: Optional[int] = None,
max_model_size: Optional[int] = None,
**kwargs
):
if tuner is None:
tuner = task_specific.StructuredDataRegressorTuner
super().__init__(
outputs=blocks.RegressionHead(
output_dim=output_dim, loss=loss, metrics=metrics
),
column_names=column_names,
column_types=column_types,
max_trials=max_trials,
directory=directory,
project_name=project_name,
objective=objective,
tuner=tuner,
overwrite=overwrite,
seed=seed,
max_model_size=max_model_size,
**kwargs
)
================================================
FILE: autokeras/tasks/structured_data_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from unittest import mock
import numpy as np
import pandas as pd
import pytest
import autokeras as ak
from autokeras import test_utils
def test_raise_error_unknown_str_in_col_type(tmp_path):
with pytest.raises(ValueError) as info:
ak.StructuredDataClassifier(
column_types={"age": "num", "parch": "categorical"},
directory=tmp_path,
seed=test_utils.SEED,
)
assert 'column_types should be either "categorical"' in str(info.value)
def test_structured_data_input_name_type_mismatch_error(tmp_path):
with pytest.raises(ValueError) as info:
clf = ak.StructuredDataClassifier(
column_types={"_age": "numerical", "parch": "categorical"},
column_names=["age", "fare"],
directory=tmp_path,
seed=test_utils.SEED,
)
clf.fit(x=test_utils.TRAIN_CSV_PATH, y="survived")
assert "column_names and column_types are mismatched." in str(info.value)
def test_structured_data_col_type_no_name_error(tmp_path):
with pytest.raises(ValueError) as info:
clf = ak.StructuredDataClassifier(
column_types={"age": "numerical", "parch": "categorical"},
directory=tmp_path,
seed=test_utils.SEED,
)
clf.fit(x=np.random.rand(100, 30), y=np.random.rand(100, 1))
assert "column_names must be specified" in str(info.value)
@mock.patch("autokeras.AutoModel.fit")
@mock.patch("autokeras.AutoModel.evaluate")
def test_structured_clf_evaluate_call_automodel_evaluate(
evaluate, fit, tmp_path
):
auto_model = ak.StructuredDataClassifier(
directory=tmp_path, seed=test_utils.SEED
)
auto_model.fit(x=test_utils.TRAIN_CSV_PATH, y="survived")
auto_model.evaluate(x=test_utils.TRAIN_CSV_PATH, y="survived")
assert evaluate.is_called
@mock.patch("autokeras.AutoModel.fit")
@mock.patch("autokeras.AutoModel.predict")
def test_structured_clf_predict_csv_call_automodel_predict(
predict, fit, tmp_path
):
auto_model = ak.StructuredDataClassifier(
directory=tmp_path, seed=test_utils.SEED
)
auto_model.fit(x=test_utils.TRAIN_CSV_PATH, y="survived")
auto_model.predict(x=test_utils.TEST_CSV_PATH)
assert predict.is_called
@mock.patch("autokeras.AutoModel.fit")
def test_structured_clf_fit_call_auto_model_fit(fit, tmp_path):
auto_model = ak.StructuredDataClassifier(
directory=tmp_path, seed=test_utils.SEED
)
auto_model.fit(
x=pd.read_csv(test_utils.TRAIN_CSV_PATH).to_numpy().astype(str)[:100],
y=test_utils.generate_one_hot_labels(num_instances=100, num_classes=3),
)
assert fit.is_called
@mock.patch("autokeras.AutoModel.fit")
def test_structured_reg_fit_call_auto_model_fit(fit, tmp_path):
auto_model = ak.StructuredDataRegressor(
directory=tmp_path, seed=test_utils.SEED
)
auto_model.fit(
x=pd.read_csv(test_utils.TRAIN_CSV_PATH).to_numpy().astype(str)[:100],
y=test_utils.generate_data(num_instances=100, shape=(1,)),
)
assert fit.is_called
================================================
FILE: autokeras/tasks/text.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from pathlib import Path
from typing import Optional
from typing import Type
from typing import Union
from autokeras import auto_model
from autokeras import blocks
from autokeras import nodes as input_module
from autokeras.engine import tuner
from autokeras.tuners import greedy
from autokeras.tuners import task_specific
from autokeras.utils import types
class SupervisedTextPipeline(auto_model.AutoModel):
def __init__(self, outputs, **kwargs):
super().__init__(
inputs=input_module.TextInput(), outputs=outputs, **kwargs
)
class TextClassifier(SupervisedTextPipeline):
"""AutoKeras text classification class.
# Arguments
num_classes: Int. Defaults to None. If None, it will be inferred from
the data.
multi_label: Boolean. Defaults to False.
loss: A Keras loss function. Defaults to use 'binary_crossentropy' or
'categorical_crossentropy' based on the number of classes.
metrics: A list of Keras metrics. Defaults to use 'accuracy'.
project_name: String. The name of the AutoModel.
Defaults to 'text_classifier'.
max_trials: Int. The maximum number of different Keras Models to try.
The search may finish before reaching the max_trials. Defaults to
100.
directory: String. The path to a directory for storing the search
outputs. Defaults to None, which would create a folder with the
name of the AutoModel in the current directory.
objective: String. Name of model metric to minimize
or maximize, e.g. 'val_accuracy'. Defaults to 'val_loss'.
tuner: String or subclass of AutoTuner. If string, it should be one of
'greedy', 'bayesian', 'hyperband' or 'random'. It can also be a
subclass of AutoTuner. If left unspecified, it uses a task specific
tuner, which first evaluates the most commonly used models for the
task before exploring other models.
overwrite: Boolean. Defaults to `False`. If `False`, reloads an existing
project of the same name if one is found. Otherwise, overwrites the
project.
seed: Int. Random seed.
max_model_size: Int. Maximum number of scalars in the parameters of a
model. Models larger than this are rejected.
**kwargs: Any arguments supported by AutoModel.
"""
def __init__(
self,
num_classes: Optional[int] = None,
multi_label: bool = False,
loss: types.LossType = None,
metrics: Optional[types.MetricsType] = None,
project_name: str = "text_classifier",
max_trials: int = 100,
directory: Union[str, Path, None] = None,
objective: str = "val_loss",
tuner: Union[str, Type[tuner.AutoTuner]] = None,
overwrite: bool = False,
seed: Optional[int] = None,
max_model_size: Optional[int] = None,
**kwargs
):
if tuner is None:
tuner = task_specific.TextClassifierTuner
super().__init__(
outputs=blocks.ClassificationHead(
num_classes=num_classes,
multi_label=multi_label,
loss=loss,
metrics=metrics,
),
max_trials=max_trials,
directory=directory,
project_name=project_name,
objective=objective,
tuner=tuner,
overwrite=overwrite,
seed=seed,
max_model_size=max_model_size,
**kwargs
)
def fit(
self,
x=None,
y=None,
epochs=None,
callbacks=None,
validation_split=0.2,
validation_data=None,
**kwargs
):
"""Search for the best model and hyperparameters for the AutoModel.
It will search for the best model based on the performances on
validation data.
# Arguments
x: numpy.ndarray. Training data x. The input
data should be numpy.ndarray. The data should
be one dimensional. Each element in the data should be a string
which is a full sentence.
y: numpy.ndarray. Training data y. It can be
raw labels, one-hot encoded if more than two classes, or binary
encoded for binary classification.
epochs: Int. The number of epochs to train each model during the
search. If unspecified, by default we train for a maximum of
1000 epochs, but we stop training if the validation loss stops
improving for 10 epochs (unless you specified an EarlyStopping
callback as part of the callbacks argument, in which case the
EarlyStopping callback you specified will determine early
stopping).
callbacks: List of Keras callbacks to apply during training and
validation.
validation_split: Float between 0 and 1. Defaults to 0.2. Fraction
of the training data to be used as validation data. The model
will set apart this fraction of the training data, will not
train on it, and will evaluate the loss and any model metrics on
this data at the end of each epoch. The validation data is
selected from the last samples in the `x` and `y` data provided,
before shuffling. This argument is not supported when `x` is a
dataset. The best model found would be fit on the entire
dataset including the validation data.
validation_data: Data on which to evaluate the loss and any model
metrics at the end of each epoch. The model will not be trained
on this data. `validation_data` will override
`validation_split`. The type of the validation data should be
the same as the training data. The best model found would be
fit on the training dataset without the validation data.
**kwargs: Any arguments supported by
[keras.Model.fit](https://keras.io/api/models/model_training_apis/#fit-method).
# Returns
history: A Keras History object corresponding to the best model.
Its History.history attribute is a record of training
loss values and metrics values at successive epochs, as well as
validation loss values and validation metrics values (if
applicable).
"""
history = super().fit(
x=x,
y=y,
epochs=epochs,
callbacks=callbacks,
validation_split=validation_split,
validation_data=validation_data,
**kwargs
)
return history
class TextRegressor(SupervisedTextPipeline):
"""AutoKeras text regression class.
# Arguments
output_dim: Int. The number of output dimensions. Defaults to None.
If None, it will be inferred from the data.
loss: A Keras loss function. Defaults to use 'mean_squared_error'.
metrics: A list of Keras metrics. Defaults to use 'mean_squared_error'.
project_name: String. The name of the AutoModel.
Defaults to 'text_regressor'.
max_trials: Int. The maximum number of different Keras Models to try.
The search may finish before reaching the max_trials. Defaults to
100.
directory: String. The path to a directory for storing the search
outputs. Defaults to None, which would create a folder with the
name of the AutoModel in the current directory.
objective: String. Name of model metric to minimize
or maximize, e.g. 'val_accuracy'. Defaults to 'val_loss'.
tuner: String or subclass of AutoTuner. If string, it should be one of
'greedy', 'bayesian', 'hyperband' or 'random'. It can also be a
subclass of AutoTuner. If left unspecified, it uses a task specific
tuner, which first evaluates the most commonly used models for the
task before exploring other models.
overwrite: Boolean. Defaults to `False`. If `False`, reloads an existing
project of the same name if one is found. Otherwise, overwrites the
project.
seed: Int. Random seed.
max_model_size: Int. Maximum number of scalars in the parameters of a
model. Models larger than this are rejected.
**kwargs: Any arguments supported by AutoModel.
"""
def __init__(
self,
output_dim: Optional[int] = None,
loss: types.LossType = "mean_squared_error",
metrics: Optional[types.MetricsType] = None,
project_name: str = "text_regressor",
max_trials: int = 100,
directory: Union[str, Path, None] = None,
objective: str = "val_loss",
tuner: Union[str, Type[tuner.AutoTuner]] = None,
overwrite: bool = False,
seed: Optional[int] = None,
max_model_size: Optional[int] = None,
**kwargs
):
if tuner is None:
tuner = greedy.Greedy
super().__init__(
outputs=blocks.RegressionHead(
output_dim=output_dim, loss=loss, metrics=metrics
),
max_trials=max_trials,
directory=directory,
project_name=project_name,
objective=objective,
tuner=tuner,
overwrite=overwrite,
seed=seed,
max_model_size=max_model_size,
**kwargs
)
def fit(
self,
x=None,
y=None,
epochs=None,
callbacks=None,
validation_split=0.2,
validation_data=None,
**kwargs
):
"""Search for the best model and hyperparameters for the AutoModel.
It will search for the best model based on the performances on
validation data.
# Arguments
x: numpy.ndarray. Training data x. The input
data should be numpy.ndarray. The data should
be one dimensional. Each element in the data should be a string
which is a full sentence.
y: numpy.ndarray. Training data y. The targets
passing to the head would have to be np.ndarray,. It can be
single-column or multi-column. The values should all be
numerical.
epochs: Int. The number of epochs to train each model during the
search. If unspecified, by default we train for a maximum of
1000 epochs, but we stop training if the validation loss stops
improving for 10 epochs (unless you specified an EarlyStopping
callback as part of the callbacks argument, in which case the
EarlyStopping callback you specified will determine early
stopping).
callbacks: List of Keras callbacks to apply during training and
validation.
validation_split: Float between 0 and 1. Defaults to 0.2. Fraction
of the training data to be used as validation data. The model
will set apart this fraction of the training data, will not
train on it, and will evaluate the loss and any model metrics on
this data at the end of each epoch. The validation data is
selected from the last samples in the `x` and `y` data provided,
before shuffling. This argument is not supported when `x` is a
dataset. The best model found would be fit on the entire
dataset including the validation data.
validation_data: Data on which to evaluate the loss and any model
metrics at the end of each epoch. The model will not be trained
on this data. `validation_data` will override
`validation_split`. The type of the validation data should be
the same as the training data. The best model found would be
fit on the training dataset without the validation data.
**kwargs: Any arguments supported by
[keras.Model.fit](https://keras.io/api/models/model_training_apis/#fit-method).
# Returns
history: A Keras History object corresponding to the best model.
Its History.history attribute is a record of training
loss values and metrics values at successive epochs, as well as
validation loss values and validation metrics values (if
applicable).
"""
history = super().fit(
x=x,
y=y,
epochs=epochs,
callbacks=callbacks,
validation_split=validation_split,
validation_data=validation_data,
**kwargs
)
return history
================================================
FILE: autokeras/tasks/text_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from unittest import mock
import numpy as np
import autokeras as ak
from autokeras import test_utils
@mock.patch("autokeras.AutoModel.fit")
def test_txt_clf_fit_call_auto_model_fit(fit, tmp_path):
auto_model = ak.TextClassifier(directory=tmp_path, seed=test_utils.SEED)
auto_model.fit(x=np.array(["a b c", "b b c"]), y=np.array([1, 2]))
assert fit.is_called
@mock.patch("autokeras.AutoModel.fit")
def test_txt_reg_fit_call_auto_model_fit(fit, tmp_path):
auto_model = ak.TextRegressor(directory=tmp_path, seed=test_utils.SEED)
auto_model.fit(x=np.array(["a b c", "b b c"]), y=np.array([1.0, 2.0]))
assert fit.is_called
================================================
FILE: autokeras/test_utils.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import inspect
import os
import keras
import numpy as np
import autokeras as ak
SEED = 5
COLUMN_NAMES = [
"sex",
"age",
"n_siblings_spouses",
"parch",
"fare",
"class",
"deck",
"embark_town",
"alone",
]
COLUMN_TYPES = {
"sex": "categorical",
"age": "numerical",
"n_siblings_spouses": "categorical",
"parch": "categorical",
"fare": "numerical",
"class": "categorical",
"deck": "categorical",
"embark_town": "categorical",
"alone": "categorical",
}
ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
# Use pre-split CSVs stored under benchmark/datasets to avoid downloads and
# processing at import time.
TRAIN_CSV_PATH = os.path.join(
ROOT_DIR, "benchmark", "datasets", "titanic_train.csv"
)
TEST_CSV_PATH = os.path.join(
ROOT_DIR, "benchmark", "datasets", "titanic_test.csv"
)
def generate_data(num_instances=100, shape=(32, 32, 3)):
np.random.seed(SEED)
result = np.random.rand(*((num_instances,) + shape))
if result.dtype == np.float64:
result = result.astype(np.float32)
return result
def generate_one_hot_labels(num_instances=100, num_classes=10):
np.random.seed(SEED)
labels = np.random.randint(num_classes, size=num_instances)
result = keras.utils.to_categorical(labels, num_classes=num_classes)
return result
def generate_text_data(num_instances=100):
vocab = np.array(
[
["adorable", "clueless", "dirty", "odd", "stupid"],
["puppy", "car", "rabbit", "girl", "monkey"],
["runs", "hits", "jumps", "drives", "barfs"],
[
"crazily.",
"dutifully.",
"foolishly.",
"merrily.",
"occasionally.",
],
]
)
return np.array(
[
" ".join([vocab[j][np.random.randint(0, 5)] for j in range(4)])
for i in range(num_instances)
]
)
def build_graph():
keras.backend.clear_session()
image_input = ak.ImageInput(shape=(32, 32, 3))
image_input.batch_size = 32
image_input.num_samples = 1000
merged_outputs = ak.SpatialReduction()(image_input)
head = ak.ClassificationHead(num_classes=10, shape=(10,))
classification_outputs = head(merged_outputs)
return ak.graph.Graph(inputs=image_input, outputs=classification_outputs)
def get_func_args(func):
params = inspect.signature(func).parameters.keys()
return set(params) - set(["self", "args", "kwargs"])
def get_object_detection_data():
images = generate_data(num_instances=2, shape=(32, 32, 3))
bbox_0 = np.random.rand(3, 4)
class_id_0 = np.random.rand(
3,
)
bbox_1 = np.random.rand(5, 4)
class_id_1 = np.random.rand(
5,
)
labels = np.array(
[(bbox_0, class_id_0), (bbox_1, class_id_1)], dtype=object
)
return images, labels
def generate_data_with_categorical(
num_instances=100,
num_numerical=10,
num_categorical=3,
num_classes=5,
):
categorical_data = np.random.randint(
num_classes, size=(num_instances, num_categorical)
)
numerical_data = np.random.rand(num_instances, num_numerical)
data = np.concatenate((numerical_data, categorical_data), axis=1)
if data.dtype == np.float64:
data = data.astype(np.float32)
return data
================================================
FILE: autokeras/tuners/__init__.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from autokeras.tuners.bayesian_optimization import BayesianOptimization
from autokeras.tuners.greedy import Greedy
from autokeras.tuners.hyperband import Hyperband
from autokeras.tuners.random_search import RandomSearch
from autokeras.tuners.task_specific import ImageClassifierTuner
================================================
FILE: autokeras/tuners/bayesian_optimization.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import keras_tuner
from autokeras.engine import tuner as tuner_module
class BayesianOptimization(
keras_tuner.BayesianOptimization, tuner_module.AutoTuner
):
"""KerasTuner BayesianOptimization with preprocessing layer tuning."""
pass
================================================
FILE: autokeras/tuners/greedy.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
from typing import Any
from typing import Dict
from typing import List
from typing import Optional
import keras_tuner
import numpy as np
from autokeras.engine import tuner as tuner_module
class TrieNode(object):
def __init__(self):
super().__init__()
self.num_leaves = 0
self.children = {}
self.hp_name = None
def is_leaf(self):
return len(self.children) == 0
class Trie(object):
def __init__(self):
super().__init__()
self.root = TrieNode()
def insert(self, hp_name):
names = hp_name.split("/")
new_word = False
current_node = self.root
nodes_on_path = [current_node]
for name in names:
if name not in current_node.children:
current_node.children[name] = TrieNode()
new_word = True
current_node = current_node.children[name]
nodes_on_path.append(current_node)
current_node.hp_name = hp_name
if new_word:
for node in nodes_on_path:
node.num_leaves += 1
@property
def nodes(self):
return self._get_all_nodes(self.root)
def _get_all_nodes(self, node):
ret = [node]
for key, value in node.children.items():
ret += self._get_all_nodes(value)
return ret
def get_hp_names(self, node):
if node.is_leaf():
return [node.hp_name]
ret = []
for key, value in node.children.items():
ret += self.get_hp_names(value)
return ret
class GreedyOracle(keras_tuner.Oracle):
"""An oracle combining random search and greedy algorithm.
It groups the HyperParameters into several categories, namely, HyperGraph,
Preprocessor, Architecture, and Optimization. The oracle tunes each group
separately using random search. In each trial, it use a greedy strategy to
generate new values for one of the categories of HyperParameters and use the
best trial so far for the rest of the HyperParameters values.
# Arguments
initial_hps: A list of dictionaries in the form of
{HyperParameter name (String): HyperParameter value}.
Each dictionary is one set of HyperParameters, which are used as the
initial trials for the search. Defaults to None.
seed: Int. Random seed.
"""
def __init__(self, initial_hps=None, seed=None, **kwargs):
super().__init__(seed=seed, **kwargs)
self.initial_hps = copy.deepcopy(initial_hps) or []
self._tried_initial_hps = [False] * len(self.initial_hps)
def get_state(self):
state = super().get_state()
state.update(
{
"initial_hps": self.initial_hps,
"tried_initial_hps": self._tried_initial_hps,
}
)
return state
def set_state(self, state):
super().set_state(state)
self.initial_hps = state["initial_hps"]
self._tried_initial_hps = state["tried_initial_hps"]
def _select_hps(self):
trie = Trie()
best_hps = self._get_best_hps()
for hp in best_hps.space:
# Not picking the fixed hps for generating new values.
if best_hps.is_active(hp) and not isinstance(
hp, keras_tuner.engine.hyperparameters.Fixed
):
trie.insert(hp.name)
all_nodes = trie.nodes
if len(all_nodes) <= 1:
return []
probabilities = np.array([1 / node.num_leaves for node in all_nodes])
sum_p = np.sum(probabilities)
probabilities = probabilities / sum_p
node = np.random.choice(all_nodes, p=probabilities)
return trie.get_hp_names(node)
def _next_initial_hps(self):
for index, hps in enumerate(self.initial_hps):
if not self._tried_initial_hps[index]:
self._tried_initial_hps[index] = True
return hps
def populate_space(self, trial_id):
if not all(self._tried_initial_hps):
values = self._next_initial_hps()
return {
"status": keras_tuner.engine.trial.TrialStatus.RUNNING,
"values": values,
}
for _ in range(self._max_collisions):
hp_names = self._select_hps()
values = self._generate_hp_values(hp_names)
# Reached max collisions.
if values is None:
continue
# Values found.
return {
"status": keras_tuner.engine.trial.TrialStatus.RUNNING,
"values": values,
}
# All stages reached max collisions.
return {
"status": keras_tuner.engine.trial.TrialStatus.STOPPED,
"values": None,
}
def _get_best_hps(self):
best_trials = self.get_best_trials()
if best_trials:
return best_trials[0].hyperparameters.copy()
else:
return self.hyperparameters.copy()
def _generate_hp_values(self, hp_names):
best_hps = self._get_best_hps()
collisions = 0
while True:
hps = keras_tuner.HyperParameters()
# Generate a set of random values.
for hp in self.hyperparameters.space:
hps.merge([hp])
# if not active, do nothing.
# if active, check if selected to be changed.
if hps.is_active(hp):
# if was active and not selected, do nothing.
if best_hps.is_active(hp.name) and hp.name not in hp_names:
hps.values[hp.name] = best_hps.values[hp.name]
continue
# if was not active or selected, sample.
hps.values[hp.name] = hp.random_sample(self._seed_state)
self._seed_state += 1
values = hps.values
# Keep trying until the set of values is unique,
# or until we exit due to too many collisions.
values_hash = self._compute_values_hash(values)
if values_hash in self._tried_so_far:
collisions += 1
if collisions <= self._max_collisions:
continue
return None
self._tried_so_far.add(values_hash)
break
return values
class Greedy(tuner_module.AutoTuner):
def __init__(
self,
hypermodel: keras_tuner.HyperModel,
objective: str = "val_loss",
max_trials: int = 10,
initial_hps: Optional[List[Dict[str, Any]]] = None,
seed: Optional[int] = None,
hyperparameters: Optional[keras_tuner.HyperParameters] = None,
tune_new_entries: bool = True,
allow_new_entries: bool = True,
**kwargs
):
self.seed = seed
oracle = GreedyOracle(
objective=objective,
max_trials=max_trials,
initial_hps=initial_hps,
seed=seed,
hyperparameters=hyperparameters,
tune_new_entries=tune_new_entries,
allow_new_entries=allow_new_entries,
)
super().__init__(oracle=oracle, hypermodel=hypermodel, **kwargs)
================================================
FILE: autokeras/tuners/greedy_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from unittest import mock
import keras
import keras_tuner
import autokeras as ak
from autokeras import test_utils
from autokeras.tuners import greedy
from autokeras.tuners import task_specific
def test_greedy_oracle_get_state_update_space_can_run():
oracle = greedy.GreedyOracle(objective="val_loss")
oracle.set_state(oracle.get_state())
hp = keras_tuner.HyperParameters()
hp.Boolean("test")
oracle.update_space(hp)
@mock.patch("autokeras.tuners.greedy.GreedyOracle.get_best_trials")
def test_greedy_oracle_populate_different_values(get_best_trials):
hp = keras_tuner.HyperParameters()
test_utils.build_graph().build(hp)
oracle = greedy.GreedyOracle(objective="val_loss", seed=test_utils.SEED)
trial = mock.Mock()
trial.hyperparameters = hp
get_best_trials.return_value = [trial]
oracle.update_space(hp)
values_a = oracle.populate_space("a")["values"]
values_b = oracle.populate_space("b")["values"]
assert not all([values_a[key] == values_b[key] for key in values_a])
@mock.patch("autokeras.tuners.greedy.GreedyOracle.get_best_trials")
def test_greedy_oracle_populate_doesnt_crash_with_init_hps(get_best_trials):
hp = keras_tuner.HyperParameters()
keras.backend.clear_session()
input_node = ak.ImageInput(shape=(32, 32, 3))
input_node.batch_size = 32
input_node.num_samples = 1000
output_node = ak.ImageBlock()(input_node)
head = ak.ClassificationHead(num_classes=10)
head.shape = (10,)
output_node = head(output_node)
graph = ak.graph.Graph(inputs=input_node, outputs=output_node)
graph.build(hp)
oracle = greedy.GreedyOracle(
initial_hps=task_specific.IMAGE_CLASSIFIER,
objective="val_loss",
seed=test_utils.SEED,
)
trial = mock.Mock()
trial.hyperparameters = hp
get_best_trials.return_value = [trial]
for i in range(10):
keras.backend.clear_session()
values = oracle.populate_space("a")["values"]
hp = oracle.hyperparameters.copy()
hp.values = values
graph.build(hp)
oracle.update_space(hp)
@mock.patch("autokeras.tuners.greedy.GreedyOracle._compute_values_hash")
@mock.patch("autokeras.tuners.greedy.GreedyOracle.get_best_trials")
def test_greedy_oracle_stop_reach_max_collision(
get_best_trials, compute_values_hash
):
hp = keras_tuner.HyperParameters()
test_utils.build_graph().build(hp)
oracle = greedy.GreedyOracle(objective="val_loss", seed=test_utils.SEED)
trial = mock.Mock()
trial.hyperparameters = hp
get_best_trials.return_value = [trial]
compute_values_hash.return_value = 1
oracle.update_space(hp)
oracle.populate_space("a")["values"]
assert (
oracle.populate_space("b")["status"]
== keras_tuner.engine.trial.TrialStatus.STOPPED
)
@mock.patch("autokeras.tuners.greedy.GreedyOracle.get_best_trials")
def test_greedy_oracle_populate_space_with_no_hp(get_best_trials):
hp = keras_tuner.HyperParameters()
oracle = greedy.GreedyOracle(objective="val_loss", seed=test_utils.SEED)
trial = mock.Mock()
trial.hyperparameters = hp
get_best_trials.return_value = [trial]
oracle.update_space(hp)
values_a = oracle.populate_space("a")["values"]
assert len(values_a) == 0
================================================
FILE: autokeras/tuners/hyperband.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import keras_tuner
from autokeras.engine import tuner as tuner_module
class Hyperband(keras_tuner.Hyperband, tuner_module.AutoTuner):
"""KerasTuner Hyperband with preprocessing layer tuning."""
def __init__(
self, max_epochs: int = 1000, max_trials: int = 100, *args, **kwargs
):
super().__init__(max_epochs=max_epochs, *args, **kwargs)
self.oracle.max_trials = max_trials
================================================
FILE: autokeras/tuners/hyperband_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
================================================
FILE: autokeras/tuners/random_search.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import keras_tuner
from autokeras.engine import tuner as tuner_module
class RandomSearch(keras_tuner.RandomSearch, tuner_module.AutoTuner):
"""KerasTuner RandomSearch with preprocessing layer tuning."""
pass
================================================
FILE: autokeras/tuners/random_search_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
================================================
FILE: autokeras/tuners/task_specific.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from autokeras.tuners import greedy
IMAGE_CLASSIFIER = [
{
"image_block_1/block_type": "vanilla",
"image_block_1/normalize": True,
"image_block_1/augment": False,
"image_block_1/conv_block_1/kernel_size": 3,
"image_block_1/conv_block_1/num_blocks": 1,
"image_block_1/conv_block_1/num_layers": 2,
"image_block_1/conv_block_1/max_pooling": True,
"image_block_1/conv_block_1/separable": False,
"image_block_1/conv_block_1/dropout": 0.25,
"image_block_1/conv_block_1/filters_0_0": 32,
"image_block_1/conv_block_1/filters_0_1": 64,
"classification_head_1/spatial_reduction_1/reduction_type": "flatten",
"classification_head_1/dropout": 0.5,
"optimizer": "adam",
"learning_rate": 1e-3,
},
{
"image_block_1/block_type": "resnet",
"image_block_1/normalize": True,
"image_block_1/augment": True,
"image_block_1/image_augmentation_1/horizontal_flip": True,
"image_block_1/image_augmentation_1/vertical_flip": True,
"image_block_1/image_augmentation_1/contrast_factor": 0.0,
"image_block_1/image_augmentation_1/rotation_factor": 0.0,
"image_block_1/image_augmentation_1/translation_factor": 0.1,
"image_block_1/image_augmentation_1/zoom_factor": 0.0,
"image_block_1/res_net_block_1/pretrained": False,
"image_block_1/res_net_block_1/version": "resnet50",
"image_block_1/res_net_block_1/imagenet_size": True,
"classification_head_1/spatial_reduction_1/reduction_type": "global_avg", # noqa: E501
"classification_head_1/dropout": 0,
"optimizer": "adam",
"learning_rate": 1e-3,
},
{
"image_block_1/block_type": "efficient",
"image_block_1/normalize": True,
"image_block_1/augment": True,
"image_block_1/image_augmentation_1/horizontal_flip": True,
"image_block_1/image_augmentation_1/vertical_flip": False,
"image_block_1/image_augmentation_1/contrast_factor": 0.0,
"image_block_1/image_augmentation_1/rotation_factor": 0.0,
"image_block_1/image_augmentation_1/translation_factor": 0.1,
"image_block_1/image_augmentation_1/zoom_factor": 0.0,
"image_block_1/efficient_net_block_1/pretrained": True,
"image_block_1/efficient_net_block_1/version": "b7",
"image_block_1/efficient_net_block_1/trainable": True,
"image_block_1/efficient_net_block_1/imagenet_size": True,
"classification_head_1/spatial_reduction_1/reduction_type": "global_avg", # noqa: E501
"classification_head_1/dropout": 0,
"optimizer": "adam",
"learning_rate": 2e-5,
},
]
TEXT_CLASSIFIER = [
{
"text_block_1/max_tokens": 500,
"text_block_1/embedding_1/embedding_dim": 32,
"text_block_1/embedding_1/dropout": 0.25,
"text_block_1/conv_block_1/kernel_size": 3,
"text_block_1/conv_block_1/separable": False,
"text_block_1/conv_block_1/max_pooling": True,
"text_block_1/conv_block_1/num_blocks": 2,
"text_block_1/conv_block_1/num_layers": 2,
"text_block_1/conv_block_1/filters_0_0": 32,
"text_block_1/conv_block_1/filters_0_1": 32,
"text_block_1/conv_block_1/dropout": 0.0,
"text_block_1/conv_block_1/filters_1_0": 32,
"text_block_1/conv_block_1/filters_1_1": 32,
"text_block_1/spatial_reduction_1/reduction_type": "flatten",
"text_block_1/dense_block_1/use_batchnorm": False,
"text_block_1/dense_block_1/num_layers": 2,
"text_block_1/dense_block_1/units_0": 32,
"text_block_1/dense_block_1/dropout": 0.0,
"text_block_1/dense_block_1/units_1": 32,
"classification_head_1/dropout": 0,
"optimizer": "adam_weight_decay",
"learning_rate": 2e-5,
},
]
STRUCTURED_DATA_CLASSIFIER = [
{
"structured_data_block_1/normalize": True,
"structured_data_block_1/dense_block_1/num_layers": 2,
"structured_data_block_1/dense_block_1/use_batchnorm": False,
"structured_data_block_1/dense_block_1/dropout": 0,
"structured_data_block_1/dense_block_1/units_0": 32,
"structured_data_block_1/dense_block_1/units_1": 32,
"classification_head_1/dropout": 0.0,
"optimizer": "adam",
"learning_rate": 0.001,
}
]
STRUCTURED_DATA_REGRESSOR = [
{
"structured_data_block_1/normalize": True,
"structured_data_block_1/dense_block_1/num_layers": 2,
"structured_data_block_1/dense_block_1/use_batchnorm": False,
"structured_data_block_1/dense_block_1/dropout": 0,
"structured_data_block_1/dense_block_1/units_0": 32,
"structured_data_block_1/dense_block_1/units_1": 32,
"regression_head_1/dropout": 0.0,
"optimizer": "adam",
"learning_rate": 0.001,
}
]
class ImageClassifierTuner(greedy.Greedy):
def __init__(self, **kwargs):
super().__init__(initial_hps=IMAGE_CLASSIFIER, **kwargs)
class TextClassifierTuner(greedy.Greedy):
def __init__(self, **kwargs):
super().__init__(initial_hps=TEXT_CLASSIFIER, **kwargs)
class StructuredDataClassifierTuner(greedy.Greedy):
def __init__(self, **kwargs):
super().__init__(initial_hps=STRUCTURED_DATA_CLASSIFIER, **kwargs)
class StructuredDataRegressorTuner(greedy.Greedy):
def __init__(self, **kwargs):
super().__init__(initial_hps=STRUCTURED_DATA_REGRESSOR, **kwargs)
================================================
FILE: autokeras/tuners/task_specific_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
import keras_tuner
import autokeras as ak
from autokeras.tuners import task_specific
def test_img_clf_init_hp0_equals_hp_of_a_model(tmp_path):
clf = ak.ImageClassifier(directory=tmp_path)
clf.inputs[0].shape = (32, 32, 3)
clf.outputs[0].in_blocks[0].shape = (10,)
init_hp = task_specific.IMAGE_CLASSIFIER[0]
hp = keras_tuner.HyperParameters()
hp.values = copy.copy(init_hp)
clf.tuner.hypermodel.build(hp)
assert set(init_hp.keys()) == set(hp._hps.keys())
def test_img_clf_init_hp1_equals_hp_of_a_model(tmp_path):
clf = ak.ImageClassifier(directory=tmp_path)
clf.inputs[0].shape = (32, 32, 3)
clf.outputs[0].in_blocks[0].shape = (10,)
init_hp = task_specific.IMAGE_CLASSIFIER[1]
hp = keras_tuner.HyperParameters()
hp.values = copy.copy(init_hp)
clf.tuner.hypermodel.build(hp)
assert set(init_hp.keys()) == set(hp._hps.keys())
def test_img_clf_init_hp2_equals_hp_of_a_model(tmp_path):
clf = ak.ImageClassifier(directory=tmp_path)
clf.inputs[0].shape = (32, 32, 3)
clf.outputs[0].in_blocks[0].shape = (10,)
init_hp = task_specific.IMAGE_CLASSIFIER[2]
hp = keras_tuner.HyperParameters()
hp.values = copy.copy(init_hp)
clf.tuner.hypermodel.build(hp)
assert set(init_hp.keys()) == set(hp._hps.keys())
def test_txt_clf_init_hp0_equals_hp_of_a_model(tmp_path):
clf = ak.TextClassifier(directory=tmp_path)
clf.inputs[0].shape = (1,)
clf.inputs[0].batch_size = 6
clf.inputs[0].num_samples = 1000
clf.outputs[0].in_blocks[0].shape = (10,)
clf.tuner.hypermodel.epochs = 1000
clf.tuner.hypermodel.num_samples = 20000
init_hp = task_specific.TEXT_CLASSIFIER[0]
hp = keras_tuner.HyperParameters()
hp.values = copy.copy(init_hp)
clf.tuner.hypermodel.build(hp)
assert set(init_hp.keys()) == set(hp._hps.keys())
def test_sd_clf_init_hp0_equals_hp_of_a_model(tmp_path):
clf = ak.StructuredDataClassifier(
directory=tmp_path,
column_names=["a", "b"],
column_types={"a": "numerical", "b": "numerical"},
)
clf.inputs[0].shape = (2,)
clf.outputs[0].in_blocks[0].shape = (10,)
init_hp = task_specific.STRUCTURED_DATA_CLASSIFIER[0]
hp = keras_tuner.HyperParameters()
hp.values = copy.copy(init_hp)
clf.tuner.hypermodel.build(hp)
assert set(init_hp.keys()) == set(hp._hps.keys())
def test_sd_reg_init_hp0_equals_hp_of_a_model(tmp_path):
clf = ak.StructuredDataRegressor(
directory=tmp_path,
column_names=["a", "b"],
column_types={"a": "numerical", "b": "numerical"},
)
clf.inputs[0].shape = (2,)
clf.outputs[0].in_blocks[0].shape = (10,)
init_hp = task_specific.STRUCTURED_DATA_REGRESSOR[0]
hp = keras_tuner.HyperParameters()
hp.values = copy.copy(init_hp)
clf.tuner.hypermodel.build(hp)
assert set(init_hp.keys()) == set(hp._hps.keys())
================================================
FILE: autokeras/utils/__init__.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
================================================
FILE: autokeras/utils/data_utils.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import keras
import numpy as np
import tree
from keras import ops
def split_dataset(dataset, validation_split):
"""Split nested numpy arrays into training and validation.
# Arguments
dataset: A nested structure of numpy arrays.
validation_split: Float. The split ratio for the validation set.
# Raises
ValueError: If the dataset provided is too small to be split.
# Returns
A tuple of two nested structures.
The training set and the validation set.
"""
# Get the number of instances from the first array's shape
first_shape = tree.flatten(
tree.map_structure(lambda x: np.array(x.shape), dataset)
)[0]
num_instances = first_shape[0]
if num_instances < 2:
raise ValueError(
"The dataset should at least contain 2 instances to be split."
)
validation_set_size = min(
max(int(num_instances * validation_split), 1), num_instances - 1
)
train_set_size = num_instances - validation_set_size
# Split each array in the nested structure
train_dataset = tree.map_structure(lambda x: x[:train_set_size], dataset)
validation_dataset = tree.map_structure(
lambda x: x[train_set_size:], dataset
)
return train_dataset, validation_dataset
def cast_to_float32(tensor):
if keras.backend.standardize_dtype(tensor.dtype) == "float32":
return tensor
return ops.cast(tensor, "float32") # pragma: no cover
def dataset_shape(dataset):
"""Recursively get shapes from a nested structure of numpy arrays.
# Arguments
nested_arrays: A nested structure (dict, list, tuple) containing numpy
arrays.
# Returns
The same nested structure with shapes instead of arrays.
"""
return tree.map_structure(lambda x: np.array(x.shape), dataset)
================================================
FILE: autokeras/utils/data_utils_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import pytest
from autokeras.utils import data_utils
def test_split_data_has_one_batch_error():
with pytest.raises(ValueError) as info:
data_utils.split_dataset(np.array([1]), 0.2)
assert "The dataset should at least contain 2" in str(info.value)
================================================
FILE: autokeras/utils/io_utils.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
from keras_tuner.engine import hyperparameters
WHITELIST_FORMATS = (".bmp", ".gif", ".jpeg", ".jpg", ".png")
def save_json(path, obj):
with open(path, "w") as f:
json.dump(obj, f)
def load_json(path):
with open(path, "r") as f:
return json.load(f)
def deserialize_block_arg(arg):
if isinstance(arg, dict):
return hyperparameters.deserialize(arg)
return arg
def serialize_block_arg(arg):
if isinstance(arg, hyperparameters.HyperParameter):
return hyperparameters.serialize(arg)
return arg
================================================
FILE: autokeras/utils/layer_utils.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from keras import layers
def get_global_average_pooling(shape):
return [
layers.GlobalAveragePooling1D,
layers.GlobalAveragePooling2D,
layers.GlobalAveragePooling3D,
][len(shape) - 3]
def get_global_max_pooling(shape):
return [
layers.GlobalMaxPool1D,
layers.GlobalMaxPool2D,
layers.GlobalMaxPool3D,
][len(shape) - 3]
def get_max_pooling(shape):
return [
layers.MaxPool1D,
layers.MaxPool2D,
layers.MaxPool3D,
][len(shape) - 3]
def get_conv(shape):
return [layers.Conv1D, layers.Conv2D, layers.Conv3D][len(shape) - 3]
def get_sep_conv(shape):
return [ # pragma: no cover
layers.SeparableConv1D,
layers.SeparableConv2D,
layers.Conv3D,
][len(shape) - 3]
================================================
FILE: autokeras/utils/types.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Callable
from typing import Dict
from typing import List
from typing import Union
import numpy as np
from keras.losses import Loss
from keras.metrics import Metric
DatasetType = Union[np.ndarray]
LossType = Union[str, Callable, Loss]
AcceptableMetric = Union[str, Callable, Metric]
MetricsType = Union[
List[AcceptableMetric],
List[List[AcceptableMetric]],
Dict[str, AcceptableMetric],
]
================================================
FILE: autokeras/utils/utils.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
import keras
import keras_tuner
import tree
# Collect OOM exceptions from available libraries
oom_exceptions = []
try:
import tensorflow as tf
oom_exceptions.append(tf.errors.ResourceExhaustedError) # pragma: no cover
except ImportError: # pragma: no cover
pass # pragma: no cover
try:
import torch
oom_exceptions.append(torch.cuda.OutOfMemoryError) # pragma: no cover
except ImportError: # pragma: no cover
pass # pragma: no cover
try:
import jax
oom_exceptions.append(jax.errors.ResourceExhaustedError) # pragma: no cover
except (ImportError, AttributeError): # pragma: no cover
pass # pragma: no cover
oom_exceptions = tuple(oom_exceptions)
def validate_num_inputs(inputs, num):
inputs = tree.flatten(inputs)
if not len(inputs) == num:
raise ValueError(
"Expected {num} elements in the inputs list "
"but received {len} inputs.".format(num=num, len=len(inputs))
)
def to_snake_case(name):
intermediate = re.sub("(.)([A-Z][a-z0-9]+)", r"\1_\2", name)
insecure = re.sub("([a-z])([A-Z])", r"\1_\2", intermediate).lower()
return insecure
def contain_instance(instance_list, instance_type):
return any(
[isinstance(instance, instance_type) for instance in instance_list]
)
def evaluate_with_adaptive_batch_size(
model, batch_size, verbose=1, **fit_kwargs
):
return run_with_adaptive_batch_size(
batch_size,
lambda x, validation_data, **kwargs: model.evaluate(
x, verbose=verbose, **kwargs
),
**fit_kwargs,
)
def predict_with_adaptive_batch_size(
model, batch_size, verbose=1, **fit_kwargs
):
return run_with_adaptive_batch_size(
batch_size,
lambda x, validation_data, **kwargs: model.predict(
x, verbose=verbose, **kwargs
),
**fit_kwargs,
)
def fit_with_adaptive_batch_size(model, batch_size, **fit_kwargs):
history = run_with_adaptive_batch_size(
batch_size, lambda **kwargs: model.fit(**kwargs), **fit_kwargs
)
return model, history
def run_with_adaptive_batch_size(batch_size, func, **fit_kwargs):
validation_data = None
if "validation_data" in fit_kwargs:
validation_data = fit_kwargs.pop("validation_data")
while batch_size > 0:
try:
history = func(
validation_data=validation_data,
batch_size=batch_size,
**fit_kwargs,
)
break
except oom_exceptions as e: # pragma: no cover
if batch_size == 1: # pragma: no cover
raise e # pragma: no cover
batch_size //= 2 # pragma: no cover
print( # pragma: no cover
"Not enough memory, reduce batch size to {batch_size}.".format(
batch_size=batch_size
)
)
return history
def get_hyperparameter(value, hp, dtype):
if value is None:
return hp
return value
def add_to_hp(hp, hps, name=None):
"""Add the HyperParameter (self) to the HyperParameters.
# Arguments
hp: keras_tuner.HyperParameters.
name: String. If left unspecified, the hp name is used.
"""
if not isinstance(hp, keras_tuner.engine.hyperparameters.HyperParameter):
return hp
kwargs = hp.get_config()
if name is None:
name = hp.name
kwargs.pop("conditions")
kwargs.pop("name")
class_name = hp.__class__.__name__
func = getattr(hps, class_name)
return func(name=name, **kwargs)
def serialize_keras_object(obj):
return keras.utils.serialize_keras_object(obj) # pragma: no cover
def deserialize_keras_object(config, module_objects=None, custom_objects=None):
return keras.utils.deserialize_keras_object(
config, custom_objects, module_objects
)
================================================
FILE: autokeras/utils/utils_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pytest
from keras_tuner.engine import hyperparameters
from autokeras.utils import utils
def test_validate_num_inputs_error():
with pytest.raises(ValueError) as info:
utils.validate_num_inputs([1, 2, 3], 2)
assert "Expected 2 elements in the inputs list" in str(info.value)
def test_get_hyperparameter_with_none_return_hp():
hp = utils.get_hyperparameter(
None, hyperparameters.Choice("hp", [10, 20]), int
)
assert isinstance(hp, hyperparameters.Choice)
def test_get_hyperparameter_with_int_return_int():
value = utils.get_hyperparameter(
10, hyperparameters.Choice("hp", [10, 20]), int
)
assert isinstance(value, int)
assert value == 10
def test_get_hyperparameter_with_hp_return_same():
hp = utils.get_hyperparameter(
hyperparameters.Choice("hp", [10, 30]),
hyperparameters.Choice("hp", [10, 20]),
int,
)
assert isinstance(hp, hyperparameters.Choice)
================================================
FILE: benchmark/README.md
================================================
Usage: python benchmark/run.py structured_data_classification report.csv
================================================
FILE: benchmark/__init__.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
================================================
FILE: benchmark/datasets/titanic_test.csv
================================================
sex,age,n_siblings_spouses,parch,fare,class,deck,embark_town,alone,survived
female,15,0,0,7.225,3,?,C,True,1
female,1,0,2,15.7417,3,?,C,False,1
male,20,1,1,15.7417,3,?,C,False,1
female,19,1,1,15.7417,3,?,C,False,1
male,33,0,0,8.05,3,?,S,True,0
male,,0,0,7.8958,3,?,S,True,0
male,,0,0,7.2292,3,?,C,True,0
female,,0,0,7.75,3,?,Q,True,0
male,,0,0,7.8958,3,?,S,True,0
male,12,1,0,11.2417,3,?,C,False,1
female,14,1,0,11.2417,3,?,C,False,1
female,29,0,0,7.925,3,?,S,True,0
male,28,0,0,8.05,3,?,S,True,0
female,18,0,0,7.775,3,?,S,True,1
female,26,0,0,7.8542,3,?,S,True,1
male,21,0,0,7.8542,3,?,S,True,0
male,41,0,0,7.125,3,?,S,True,0
male,39,0,0,7.925,3,?,S,True,1
male,21,0,0,7.8,3,?,S,True,0
male,28.5,0,0,7.2292,3,?,C,True,0
female,22,0,0,7.75,3,?,S,True,1
male,61,0,0,6.2375,3,?,S,True,0
male,,1,0,15.5,3,?,Q,False,0
male,,0,0,7.8292,3,?,Q,True,0
female,,1,0,15.5,3,?,Q,False,1
male,,0,0,7.7333,3,?,Q,True,0
male,,0,0,7.75,3,?,Q,True,0
male,,0,0,7.75,3,?,Q,True,0
male,23,0,0,9.225,3,?,S,True,0
female,,0,0,7.75,3,?,Q,True,0
female,,0,0,7.75,3,?,Q,True,1
female,,0,0,7.8792,3,?,Q,True,1
female,22,0,0,7.775,3,?,S,True,1
male,,0,0,7.75,3,?,Q,True,1
female,,0,0,7.8292,3,?,Q,True,1
male,9,0,1,3.1708,3,?,S,False,1
male,28,0,0,22.525,3,?,S,True,0
male,42,0,1,8.4042,3,?,S,False,0
male,,0,0,7.3125,3,?,S,True,0
female,31,0,0,7.8542,3,?,S,True,0
male,28,0,0,7.8542,3,?,S,True,0
male,32,0,0,7.775,3,?,S,True,1
male,20,0,0,9.225,3,?,S,True,0
female,23,0,0,8.6625,3,?,S,True,0
female,20,0,0,8.6625,3,?,S,True,0
male,20,0,0,8.6625,3,?,S,True,0
male,16,0,0,9.2167,3,?,S,True,0
female,31,0,0,8.6833,3,?,S,True,1
female,,0,0,7.6292,3,?,Q,True,0
male,2,3,1,21.075,3,?,S,False,0
male,6,3,1,21.075,3,?,S,False,0
female,3,3,1,21.075,3,?,S,False,0
female,8,3,1,21.075,3,?,S,False,0
female,29,0,4,21.075,3,?,S,False,0
male,1,4,1,39.6875,3,?,S,False,0
male,7,4,1,39.6875,3,?,S,False,0
male,2,4,1,39.6875,3,?,S,False,0
male,16,4,1,39.6875,3,?,S,False,0
male,14,4,1,39.6875,3,?,S,False,0
female,41,0,5,39.6875,3,?,S,False,0
male,21,0,0,8.6625,3,?,S,True,0
male,19,0,0,14.5,3,?,S,True,0
male,,0,0,8.7125,3,?,C,True,0
male,32,0,0,7.8958,3,?,S,True,0
male,0.75,1,1,13.775,3,?,S,False,0
female,3,1,1,13.775,3,?,S,False,0
female,26,0,2,13.775,3,?,S,False,0
male,,0,0,7,3,?,S,True,0
male,,0,0,7.775,3,?,S,True,0
male,,0,0,8.05,3,?,S,True,0
male,21,0,0,7.925,3,?,S,True,0
male,25,0,0,7.925,3,?,S,True,0
male,22,0,0,7.25,3,?,S,True,0
male,25,1,0,7.775,3,?,S,False,1
male,,1,1,22.3583,3,?,C,False,1
female,,1,1,22.3583,3,F E69,C,False,1
female,,0,2,22.3583,3,?,C,False,1
female,,0,0,8.1375,3,?,Q,True,0
male,24,0,0,8.05,3,?,S,True,0
female,28,0,0,7.8958,3,?,S,True,0
male,19,0,0,7.8958,3,?,S,True,0
male,,0,0,7.8958,3,?,S,True,0
male,25,1,0,7.775,3,?,S,False,0
female,18,0,0,7.775,3,?,S,True,0
male,32,0,0,8.05,3,E10,S,True,1
male,,0,0,7.8958,3,?,S,True,0
male,17,0,0,8.6625,3,?,S,True,0
male,24,0,0,8.6625,3,?,S,True,0
male,,0,0,7.8958,3,?,S,True,0
female,,0,0,8.1125,3,?,S,True,0
male,,0,0,7.2292,3,?,C,True,0
male,,0,0,7.25,3,?,S,True,0
male,38,0,0,7.8958,3,?,S,True,0
male,21,0,0,8.05,3,?,S,True,0
male,10,4,1,29.125,3,?,Q,False,0
male,4,4,1,29.125,3,?,Q,False,0
male,7,4,1,29.125,3,?,Q,False,0
male,2,4,1,29.125,3,?,Q,False,0
male,8,4,1,29.125,3,?,Q,False,0
female,39,0,5,29.125,3,?,Q,False,0
female,22,0,0,39.6875,3,?,S,True,0
male,35,0,0,7.125,3,?,S,True,0
female,,0,0,7.7208,3,?,Q,True,1
male,,0,0,14.5,3,?,S,True,0
female,,0,0,14.5,3,?,S,True,0
male,50,1,0,14.5,3,?,S,False,0
female,47,1,0,14.5,3,?,S,False,0
male,,0,0,8.05,3,?,S,True,0
male,,0,0,7.775,3,?,S,True,0
female,2,1,1,20.2125,3,?,S,False,0
male,18,1,1,20.2125,3,?,S,False,0
female,41,0,2,20.2125,3,?,S,False,0
female,,0,0,8.05,3,?,S,True,1
male,50,0,0,8.05,3,?,S,True,0
male,16,0,0,8.05,3,?,S,True,0
male,,0,0,7.75,3,?,Q,True,1
male,,0,0,24.15,3,?,Q,True,0
male,,0,0,7.2292,3,?,C,True,0
male,25,0,0,7.225,3,?,C,True,0
male,,0,0,7.225,3,?,C,True,0
male,,0,0,7.7292,3,?,Q,True,0
male,,0,0,7.575,3,?,S,True,0
male,38.5,0,0,7.25,3,?,S,True,0
male,,8,2,69.55,3,?,S,False,0
male,14.5,8,2,69.55,3,?,S,False,0
female,,8,2,69.55,3,?,S,False,0
female,,8,2,69.55,3,?,S,False,0
female,,8,2,69.55,3,?,S,False,0
female,,8,2,69.55,3,?,S,False,0
male,,8,2,69.55,3,?,S,False,0
male,,8,2,69.55,3,?,S,False,0
male,,8,2,69.55,3,?,S,False,0
male,,1,9,69.55,3,?,S,False,0
female,,1,9,69.55,3,?,S,False,0
male,24,0,0,9.325,3,?,S,True,0
female,21,0,0,7.65,3,?,S,True,1
male,39,0,0,7.925,3,?,S,True,0
male,,2,0,21.6792,3,?,C,False,0
male,,2,0,21.6792,3,?,C,False,0
male,,2,0,21.6792,3,?,C,False,0
female,1,1,1,16.7,3,G6,S,False,1
female,24,0,2,16.7,3,G6,S,False,1
female,4,1,1,16.7,3,G6,S,False,1
male,25,0,0,9.5,3,?,S,True,1
male,20,0,0,8.05,3,?,S,True,0
male,24.5,0,0,8.05,3,?,S,True,0
male,,0,0,7.725,3,?,Q,True,0
male,,0,0,7.8958,3,?,S,True,0
male,,0,0,7.75,3,?,Q,True,0
male,29,0,0,9.5,3,?,S,True,1
male,,0,0,15.1,3,?,S,True,0
female,,0,0,7.7792,3,?,Q,True,1
male,,0,0,8.05,3,?,S,True,0
male,,0,0,8.05,3,?,S,True,0
male,22,0,0,7.2292,3,?,C,True,0
male,,0,0,8.05,3,?,S,True,0
male,40,0,0,7.8958,3,?,S,True,0
male,21,0,0,7.925,3,?,S,True,0
female,18,0,0,7.4958,3,?,S,True,1
male,4,3,2,27.9,3,?,S,False,0
male,10,3,2,27.9,3,?,S,False,0
female,9,3,2,27.9,3,?,S,False,0
female,2,3,2,27.9,3,?,S,False,0
male,40,1,4,27.9,3,?,S,False,0
female,45,1,4,27.9,3,?,S,False,0
male,,0,0,7.8958,3,?,S,True,0
male,,0,0,8.05,3,?,S,True,0
male,,0,0,8.6625,3,?,S,True,0
male,,0,0,7.75,3,?,Q,True,0
female,,0,0,7.7333,3,?,Q,True,1
male,19,0,0,7.65,3,F G73,S,True,0
male,30,0,0,8.05,3,?,S,True,0
male,,0,0,8.05,3,?,S,True,0
male,32,0,0,8.05,3,?,S,True,0
male,,0,0,7.8958,3,?,S,True,0
male,33,0,0,8.6625,3,?,C,True,0
female,23,0,0,7.55,3,?,S,True,1
male,21,0,0,8.05,3,?,S,True,0
male,60.5,0,0,?,3,?,S,True,0
male,19,0,0,7.8958,3,?,S,True,0
female,22,0,0,9.8375,3,?,S,True,0
male,31,0,0,7.925,3,?,S,True,1
male,27,0,0,8.6625,3,?,S,True,0
female,2,0,1,10.4625,3,G6,S,False,0
female,29,1,1,10.4625,3,G6,S,False,0
male,16,0,0,8.05,3,?,S,True,1
male,44,0,0,7.925,3,?,S,True,1
male,25,0,0,7.05,3,?,S,True,0
male,74,0,0,7.775,3,?,S,True,0
male,14,0,0,9.225,3,?,S,True,1
male,24,0,0,7.7958,3,?,S,True,0
male,25,0,0,7.7958,3,?,S,True,1
male,34,0,0,8.05,3,?,S,True,0
male,0.4167,0,1,8.5167,3,?,C,False,1
male,,1,0,6.4375,3,?,C,False,0
male,,0,0,6.4375,3,?,C,True,0
male,,0,0,7.225,3,?,C,True,0
female,16,1,1,8.5167,3,?,C,False,1
male,,0,0,8.05,3,?,S,True,0
male,,1,0,16.1,3,?,S,False,0
female,,1,0,16.1,3,?,S,False,1
male,32,0,0,7.925,3,?,S,True,0
male,,0,0,7.75,3,F38,Q,True,0
male,,0,0,7.8958,3,?,S,True,0
male,30.5,0,0,8.05,3,?,S,True,0
male,44,0,0,8.05,3,?,S,True,0
male,,0,0,7.2292,3,?,C,True,0
male,25,0,0,0,3,?,S,True,1
male,,0,0,7.2292,3,?,C,True,0
male,7,1,1,15.2458,3,?,C,False,1
female,9,1,1,15.2458,3,?,C,False,1
female,29,0,2,15.2458,3,?,C,False,1
male,36,0,0,7.8958,3,?,S,True,0
female,18,0,0,9.8417,3,?,S,True,1
female,63,0,0,9.5875,3,?,S,True,1
male,,1,1,14.5,3,?,S,False,0
male,11.5,1,1,14.5,3,?,S,False,0
male,40.5,0,2,14.5,3,?,S,False,0
female,10,0,2,24.15,3,?,S,False,0
male,36,1,1,24.15,3,?,S,False,0
female,30,1,1,24.15,3,?,S,False,0
male,,0,0,9.5,3,?,S,True,0
male,33,0,0,9.5,3,?,S,True,0
male,28,0,0,9.5,3,?,S,True,0
male,28,0,0,9.5,3,?,S,True,0
male,47,0,0,9,3,?,S,True,0
female,18,2,0,18,3,?,S,False,0
male,31,3,0,18,3,?,S,False,0
male,16,2,0,18,3,?,S,False,0
female,31,1,0,18,3,?,S,False,0
male,22,0,0,7.225,3,?,C,True,1
male,20,0,0,7.8542,3,?,S,True,0
female,14,0,0,7.8542,3,?,S,True,0
male,22,0,0,7.8958,3,?,S,True,0
male,22,0,0,9,3,?,S,True,0
male,,0,0,8.05,3,?,S,True,0
male,,0,0,7.55,3,?,S,True,0
male,,0,0,8.05,3,?,S,True,0
male,32.5,0,0,9.5,3,?,S,True,0
female,38,0,0,7.2292,3,?,C,True,1
male,51,0,0,7.75,3,?,S,True,0
male,18,1,0,6.4958,3,?,S,False,0
male,21,1,0,6.4958,3,?,S,False,0
female,47,1,0,7,3,?,S,False,1
male,,0,0,8.7125,3,?,S,True,0
male,,0,0,7.55,3,?,S,True,0
male,,0,0,8.05,3,?,S,True,0
male,28.5,0,0,16.1,3,?,S,True,0
male,21,0,0,7.25,3,?,S,True,0
male,27,0,0,8.6625,3,?,S,True,0
male,,0,0,7.25,3,?,S,True,0
male,36,0,0,9.5,3,?,S,True,0
male,27,1,0,14.4542,3,?,C,False,0
female,15,1,0,14.4542,3,?,C,False,1
male,45.5,0,0,7.225,3,?,C,True,0
male,,0,0,7.225,3,?,C,True,0
male,,0,0,14.4583,3,?,C,True,0
female,14.5,1,0,14.4542,3,?,C,False,0
female,,1,0,14.4542,3,?,C,False,0
male,26.5,0,0,7.225,3,?,C,True,0
male,27,0,0,7.225,3,?,C,True,0
male,29,0,0,7.875,3,?,S,True,0
================================================
FILE: benchmark/datasets/titanic_train.csv
================================================
sex,age,n_siblings_spouses,parch,fare,class,deck,embark_town,alone,survived
female,29,0,0,211.3375,1,B5,S,True,1
male,0.9167,1,2,151.55,1,C22 C26,S,False,1
female,2,1,2,151.55,1,C22 C26,S,False,0
male,30,1,2,151.55,1,C22 C26,S,False,0
female,25,1,2,151.55,1,C22 C26,S,False,0
male,48,0,0,26.55,1,E12,S,True,1
female,63,1,0,77.9583,1,D7,S,False,1
male,39,0,0,0.0,1,A36,S,True,0
female,53,2,0,51.4792,1,C101,S,False,1
male,71,0,0,49.5042,1,?,C,True,0
male,47,1,0,227.525,1,C62 C64,C,False,0
female,18,1,0,227.525,1,C62 C64,C,False,1
female,24,0,0,69.3,1,B35,C,True,1
female,26,0,0,78.85,1,?,S,True,1
male,80,0,0,30.0,1,A23,S,True,1
male,,0,0,25.925,1,?,S,True,0
male,24,0,1,247.5208,1,B58 B60,C,False,0
female,50,0,1,247.5208,1,B58 B60,C,False,1
female,32,0,0,76.2917,1,D15,C,True,1
male,36,0,0,75.2417,1,C6,C,True,0
male,37,1,1,52.5542,1,D35,S,False,1
female,47,1,1,52.5542,1,D35,S,False,1
male,26,0,0,30.0,1,C148,C,True,1
female,42,0,0,227.525,1,?,C,True,1
female,29,0,0,221.7792,1,C97,S,True,1
male,25,0,0,26.0,1,?,C,True,0
male,25,1,0,91.0792,1,B49,C,False,1
female,19,1,0,91.0792,1,B49,C,False,1
female,35,0,0,135.6333,1,C99,S,True,1
male,28,0,0,26.55,1,C52,S,True,1
male,45,0,0,35.5,1,T,S,True,0
male,40,0,0,31.0,1,A31,C,True,1
female,30,0,0,164.8667,1,C7,S,True,1
female,58,0,0,26.55,1,C103,S,True,1
male,42,0,0,26.55,1,D22,S,True,0
female,45,0,0,262.375,1,?,C,True,1
female,22,0,1,55.0,1,E33,S,False,1
male,,0,0,26.55,1,?,S,True,1
male,41,0,0,30.5,1,A21,S,True,0
male,48,0,0,50.4958,1,B10,C,True,0
male,,0,0,39.6,1,?,C,True,0
female,44,0,0,27.7208,1,B4,C,True,1
female,59,2,0,51.4792,1,C101,S,False,1
female,60,0,0,76.2917,1,D15,C,True,1
female,41,0,0,134.5,1,E40,C,True,1
male,45,0,0,26.55,1,B38,S,True,0
male,,0,0,31.0,1,?,S,True,0
male,42,0,0,26.2875,1,E24,S,True,1
female,53,0,0,27.4458,1,?,C,True,1
male,36,0,1,512.3292,1,B51 B53 B55,C,False,1
female,58,0,1,512.3292,1,B51 B53 B55,C,False,1
male,33,0,0,5.0,1,B51 B53 B55,S,True,0
male,28,0,0,47.1,1,?,S,True,0
male,17,0,0,47.1,1,?,S,True,0
male,11,1,2,120.0,1,B96 B98,S,False,1
female,14,1,2,120.0,1,B96 B98,S,False,1
male,36,1,2,120.0,1,B96 B98,S,False,1
female,36,1,2,120.0,1,B96 B98,S,False,1
male,49,0,0,26.0,1,?,S,True,0
female,,0,0,27.7208,1,?,C,True,1
male,36,1,0,78.85,1,C46,S,False,0
female,76,1,0,78.85,1,C46,S,False,1
male,46,1,0,61.175,1,E31,S,False,0
female,47,1,0,61.175,1,E31,S,False,1
male,27,1,0,53.1,1,E8,S,False,1
female,33,1,0,53.1,1,E8,S,False,1
female,36,0,0,262.375,1,B61,C,True,1
female,30,0,0,86.5,1,B77,S,True,1
male,45,0,0,29.7,1,A9,C,True,1
female,,0,1,55.0,1,E33,S,False,1
male,,0,0,0.0,1,?,S,True,0
male,27,1,0,136.7792,1,C89,C,False,0
female,26,1,0,136.7792,1,C89,C,False,1
female,22,0,0,151.55,1,?,S,True,1
male,,0,0,52.0,1,A14,S,True,0
male,47,0,0,25.5875,1,E58,S,True,0
female,39,1,1,83.1583,1,E49,C,False,1
male,37,1,1,83.1583,1,E52,C,False,0
female,64,0,2,83.1583,1,E45,C,False,1
female,55,2,0,25.7,1,C101,S,False,1
male,,0,0,26.55,1,?,S,True,0
male,70,1,1,71.0,1,B22,S,False,0
female,36,0,2,71.0,1,B22,S,False,1
female,64,1,1,26.55,1,B26,S,False,1
male,39,1,0,71.2833,1,C85,C,False,0
female,38,1,0,71.2833,1,C85,C,False,1
male,51,0,0,26.55,1,E17,S,True,1
male,27,0,0,30.5,1,?,S,True,1
female,33,0,0,151.55,1,?,S,True,1
male,31,1,0,52.0,1,B71,S,False,0
female,27,1,2,52.0,1,B71,S,False,1
male,31,1,0,57.0,1,B20,S,False,1
female,17,1,0,57.0,1,B20,S,False,1
male,53,1,1,81.8583,1,A34,S,False,1
male,4,0,2,81.8583,1,A34,S,False,1
female,54,1,1,81.8583,1,A34,S,False,1
male,50,1,0,106.425,1,C86,C,False,0
female,27,1,1,247.5208,1,B58 B60,C,False,1
female,48,1,0,106.425,1,C86,C,False,1
female,48,1,0,39.6,1,A16,C,False,1
male,49,1,0,56.9292,1,A20,C,False,1
male,39,0,0,29.7,1,A18,C,True,0
female,23,0,1,83.1583,1,C54,C,False,1
female,38,0,0,227.525,1,C45,C,True,1
female,54,1,0,78.2667,1,D20,C,False,1
female,36,0,0,31.6792,1,A29,C,True,0
male,,0,0,221.7792,1,C95,S,True,0
female,,0,0,31.6833,1,?,S,True,1
female,,0,0,110.8833,1,?,C,True,1
male,36,0,0,26.3875,1,E25,S,True,1
male,30,0,0,27.75,1,C111,C,True,0
female,24,3,2,263.0,1,C23 C25 C27,S,False,1
female,28,3,2,263.0,1,C23 C25 C27,S,False,1
female,23,3,2,263.0,1,C23 C25 C27,S,False,1
male,19,3,2,263.0,1,C23 C25 C27,S,False,0
male,64,1,4,263.0,1,C23 C25 C27,S,False,0
female,60,1,4,263.0,1,C23 C25 C27,S,False,1
female,30,0,0,56.9292,1,E36,C,True,1
male,,0,0,26.55,1,D34,S,True,0
male,50,2,0,133.65,1,?,S,False,1
male,43,1,0,27.7208,1,D40,C,False,1
female,,1,0,133.65,1,?,S,False,1
female,22,0,2,49.5,1,B39,C,False,1
male,60,1,1,79.2,1,B41,C,False,1
female,48,1,1,79.2,1,B41,C,False,1
male,,0,0,0.0,1,B102,S,True,0
male,37,1,0,53.1,1,C123,S,False,0
female,35,1,0,53.1,1,C123,S,False,1
male,47,0,0,38.5,1,E63,S,True,0
female,35,0,0,211.5,1,C130,C,True,1
female,22,0,1,59.4,1,?,C,False,1
female,45,0,1,59.4,1,?,C,False,1
male,24,0,0,79.2,1,B86,C,True,0
male,49,1,0,89.1042,1,C92,C,False,1
female,,1,0,89.1042,1,C92,C,False,1
male,71,0,0,34.6542,1,A5,C,True,0
male,53,0,0,28.5,1,C51,C,True,1
female,19,0,0,30.0,1,B42,S,True,1
male,38,0,1,153.4625,1,C91,S,False,0
female,58,0,1,153.4625,1,C125,S,False,1
male,23,0,1,63.3583,1,D10 D12,C,False,1
female,45,0,1,63.3583,1,D10 D12,C,False,1
male,46,0,0,79.2,1,B82 B84,C,True,0
male,25,1,0,55.4417,1,E50,C,False,1
female,25,1,0,55.4417,1,E50,C,False,1
male,48,1,0,76.7292,1,D33,C,False,1
female,49,1,0,76.7292,1,D33,C,False,1
male,,0,0,42.4,1,?,S,True,0
male,45,1,0,83.475,1,C83,S,False,0
female,35,1,0,83.475,1,C83,S,False,1
male,40,0,0,0.0,1,B94,S,True,0
male,27,0,0,76.7292,1,D49,C,True,1
male,,0,0,30.0,1,D45,S,True,1
female,24,0,0,83.1583,1,C54,C,True,1
male,55,1,1,93.5,1,B69,S,False,0
female,52,1,1,93.5,1,B69,S,False,1
male,42,0,0,42.5,1,B11,S,True,0
male,,0,0,51.8625,1,E46,S,True,0
male,55,0,0,50.0,1,C39,S,True,0
female,16,0,1,57.9792,1,B18,C,False,1
female,44,0,1,57.9792,1,B18,C,False,1
female,51,1,0,77.9583,1,D11,S,False,1
male,42,1,0,52.0,1,?,S,False,0
female,35,1,0,52.0,1,?,S,False,1
male,35,0,0,26.55,1,?,C,True,1
male,38,1,0,90.0,1,C93,S,False,1
male,,0,0,30.6958,1,?,C,True,0
female,35,1,0,90.0,1,C93,S,False,1
female,38,0,0,80.0,1,B28,?,True,1
female,50,0,0,28.7125,1,C49,C,True,0
male,49,0,0,0.0,1,B52 B54 B56,S,True,1
male,46,0,0,26.0,1,?,S,True,0
male,50,0,0,26.0,1,E60,S,True,0
male,32.5,0,0,211.5,1,C132,C,True,0
male,58,0,0,29.7,1,B37,C,True,0
male,41,1,0,51.8625,1,D21,S,False,0
female,,1,0,51.8625,1,D21,S,False,1
male,42,1,0,52.5542,1,D19,S,False,1
female,45,1,0,52.5542,1,D19,S,False,1
male,,0,0,26.55,1,C124,S,True,0
female,39,0,0,211.3375,1,?,S,True,1
female,49,0,0,25.9292,1,D17,S,True,1
female,30,0,0,106.425,1,?,C,True,1
male,35,0,0,512.3292,1,B101,C,True,1
male,,0,0,27.7208,1,?,C,True,0
male,42,0,0,26.55,1,?,S,True,0
female,55,0,0,27.7208,1,?,C,True,1
female,16,0,1,39.4,1,D28,S,False,1
female,51,0,1,39.4,1,D28,S,False,1
male,29,0,0,30.0,1,D6,S,True,0
female,21,0,0,77.9583,1,D9,S,True,1
male,30,0,0,45.5,1,?,S,True,0
female,58,0,0,146.5208,1,B80,C,True,1
female,15,0,1,211.3375,1,B5,S,False,1
male,30,0,0,26.0,1,C106,S,True,0
female,16,0,0,86.5,1,B79,S,True,1
male,,0,0,29.7,1,C47,C,True,1
male,19,1,0,53.1,1,D30,S,False,0
female,18,1,0,53.1,1,D30,S,False,1
female,24,0,0,49.5042,1,C90,C,True,1
male,46,0,0,75.2417,1,C6,C,True,0
male,54,0,0,51.8625,1,E46,S,True,0
male,36,0,0,26.2875,1,E25,S,True,1
male,28,1,0,82.1708,1,?,C,False,0
female,,1,0,82.1708,1,?,C,False,1
male,65,0,0,26.55,1,E38,S,True,0
male,44,2,0,90.0,1,C78,Q,False,0
female,33,1,0,90.0,1,C78,Q,False,1
female,37,1,0,90.0,1,C78,Q,False,1
male,30,1,0,57.75,1,C78,C,False,1
male,55,0,0,30.5,1,C30,S,True,0
male,47,0,0,42.4,1,?,S,True,0
male,37,0,1,29.7,1,C118,C,False,0
female,31,1,0,113.275,1,D36,C,False,1
female,23,1,0,113.275,1,D36,C,False,1
male,58,0,2,113.275,1,D48,C,False,0
female,19,0,2,26.2833,1,D47,S,False,1
male,64,0,0,26.0,1,?,S,True,0
female,39,0,0,108.9,1,C105,C,True,1
male,,0,0,25.7417,1,?,C,True,1
female,22,0,1,61.9792,1,B36,C,False,1
male,65,0,1,61.9792,1,B30,C,False,0
male,28.5,0,0,27.7208,1,D43,C,True,0
male,,0,0,0.0,1,?,S,True,0
male,45.5,0,0,28.5,1,C124,S,True,0
male,23,0,0,93.5,1,B24,S,True,0
male,29,1,0,66.6,1,C2,S,False,0
female,22,1,0,66.6,1,C2,S,False,1
male,18,1,0,108.9,1,C65,C,False,0
female,17,1,0,108.9,1,C65,C,False,1
female,30,0,0,93.5,1,B73,S,True,1
male,52,0,0,30.5,1,C104,S,True,1
male,47,0,0,52.0,1,C110,S,True,0
female,56,0,1,83.1583,1,C50,C,False,1
male,38,0,0,0.0,1,?,S,True,0
male,,0,0,39.6,1,?,S,True,1
male,22,0,0,135.6333,1,?,C,True,0
male,,0,0,227.525,1,?,C,True,0
female,43,0,1,211.3375,1,B3,S,False,1
male,31,0,0,50.4958,1,A24,S,True,0
male,45,0,0,26.55,1,?,S,True,1
male,,0,0,50.0,1,A32,S,True,0
female,33,0,0,27.7208,1,A11,C,True,1
male,46,0,0,79.2,1,?,C,True,0
male,36,0,0,40.125,1,A10,C,True,0
female,33,0,0,86.5,1,B77,S,True,1
male,55,1,0,59.4,1,?,C,False,0
female,54,1,0,59.4,1,?,C,False,1
male,33,0,0,26.55,1,?,S,True,0
male,13,2,2,262.375,1,B57 B59 B63 B66,C,False,1
female,18,2,2,262.375,1,B57 B59 B63 B66,C,False,1
female,21,2,2,262.375,1,B57 B59 B63 B66,C,False,1
male,61,1,3,262.375,1,B57 B59 B63 B66,C,False,0
female,48,1,3,262.375,1,B57 B59 B63 B66,C,False,1
male,,0,0,30.5,1,C106,S,True,1
female,24,0,0,69.3,1,B35,C,True,1
male,,0,0,26.0,1,?,S,True,1
female,35,1,0,57.75,1,C28,C,False,1
female,30,0,0,31.0,1,?,C,True,1
male,34,0,0,26.55,1,?,S,True,1
female,40,0,0,153.4625,1,C125,S,True,1
male,35,0,0,26.2875,1,E24,S,True,1
male,50,1,0,55.9,1,E44,S,False,0
female,39,1,0,55.9,1,E44,S,False,1
male,56,0,0,35.5,1,A26,C,True,1
male,28,0,0,35.5,1,A6,S,True,1
male,56,0,0,26.55,1,?,S,True,0
male,56,0,0,30.6958,1,A7,C,True,0
male,24,1,0,60.0,1,C31,S,False,0
male,,0,0,26.0,1,A19,S,True,0
female,18,1,0,60.0,1,C31,S,False,1
male,24,1,0,82.2667,1,B45,S,False,1
female,23,1,0,82.2667,1,B45,S,False,1
male,6,0,2,134.5,1,E34,C,False,1
male,45,1,1,134.5,1,E34,C,False,1
female,40,1,1,134.5,1,E34,C,False,1
male,57,1,0,146.5208,1,B78,C,False,0
female,,1,0,146.5208,1,B78,C,False,1
male,32,0,0,30.5,1,B50,C,True,1
male,62,0,0,26.55,1,C87,S,True,0
male,54,1,0,55.4417,1,C116,C,False,1
female,43,1,0,55.4417,1,C116,C,False,1
female,52,1,0,78.2667,1,D20,C,False,1
male,,0,0,27.7208,1,?,C,True,0
female,62,0,0,80.0,1,B28,?,True,1
male,67,1,0,221.7792,1,C55 C57,S,False,0
female,63,1,0,221.7792,1,C55 C57,S,False,0
male,61,0,0,32.3208,1,D50,S,True,0
female,48,0,0,25.9292,1,D17,S,True,1
female,18,0,2,79.65,1,E68,S,False,1
male,52,1,1,79.65,1,E67,S,False,0
female,39,1,1,79.65,1,E67,S,False,1
male,48,1,0,52.0,1,C126,S,False,1
female,,1,0,52.0,1,C126,S,False,1
male,49,1,1,110.8833,1,C68,C,False,0
male,17,0,2,110.8833,1,C70,C,False,1
female,39,1,1,110.8833,1,C68,C,False,1
female,,0,0,79.2,1,?,C,True,1
male,31,0,0,28.5375,1,C53,C,True,1
male,40,0,0,27.7208,1,?,C,True,0
male,61,0,0,33.5,1,B19,S,True,0
male,47,0,0,34.0208,1,D46,S,True,0
female,35,0,0,512.3292,1,?,C,True,1
male,64,1,0,75.25,1,D37,C,False,0
female,60,1,0,75.25,1,D37,C,False,1
male,60,0,0,26.55,1,?,S,True,0
male,54,0,1,77.2875,1,D26,S,False,0
male,21,0,1,77.2875,1,D26,S,False,0
female,55,0,0,135.6333,1,C32,C,True,1
female,31,0,2,164.8667,1,C7,S,False,1
male,57,1,1,164.8667,1,?,S,False,0
female,45,1,1,164.8667,1,?,S,False,1
male,50,1,1,211.5,1,C80,C,False,0
male,27,0,2,211.5,1,C82,C,False,0
female,50,1,1,211.5,1,C80,C,False,1
female,21,0,0,26.55,1,?,S,True,1
male,51,0,1,61.3792,1,?,C,False,0
male,21,0,1,61.3792,1,?,C,False,1
male,,0,0,35.0,1,C128,S,True,0
female,31,0,0,134.5,1,E39 E41,C,True,1
male,,0,0,35.5,1,C52,S,True,1
male,62,0,0,26.55,1,?,S,True,0
female,36,0,0,135.6333,1,C32,C,True,1
male,30,1,0,24.0,2,?,C,False,0
female,28,1,0,24.0,2,?,C,False,1
male,30,0,0,13.0,2,?,S,True,0
male,18,0,0,11.5,2,?,S,True,0
male,25,0,0,10.5,2,?,S,True,0
male,34,1,0,26.0,2,?,S,False,0
female,36,1,0,26.0,2,?,S,False,1
male,57,0,0,13.0,2,?,S,True,0
male,18,0,0,11.5,2,?,S,True,0
male,23,0,0,10.5,2,?,S,True,0
female,36,0,0,13.0,2,D,S,True,1
male,28,0,0,10.5,2,?,S,True,0
male,51,0,0,12.525,2,?,S,True,0
male,32,1,0,26.0,2,?,S,False,1
female,19,1,0,26.0,2,?,S,False,1
male,28,0,0,26.0,2,?,S,True,0
male,1,2,1,39.0,2,F4,S,False,1
female,4,2,1,39.0,2,F4,S,False,1
female,12,2,1,39.0,2,F4,S,False,1
female,36,0,3,39.0,2,F4,S,False,1
male,34,0,0,13.0,2,D56,S,True,1
female,19,0,0,13.0,2,?,S,True,1
male,23,0,0,13.0,2,?,S,True,0
male,26,0,0,13.0,2,?,S,True,0
male,42,0,0,13.0,2,?,S,True,0
male,27,0,0,13.0,2,?,S,True,0
female,24,0,0,13.0,2,F33,S,True,1
female,15,0,2,39.0,2,?,S,False,1
male,60,1,1,39.0,2,?,S,False,0
female,40,1,1,39.0,2,?,S,False,1
female,20,1,0,26.0,2,?,S,False,1
male,25,1,0,26.0,2,?,S,False,0
female,36,0,0,13.0,2,?,S,True,1
male,25,0,0,13.0,2,?,S,True,0
male,42,0,0,13.0,2,?,S,True,0
female,42,0,0,13.0,2,?,S,True,1
male,0.8333,0,2,29.0,2,?,S,False,1
male,26,1,1,29.0,2,?,S,False,1
female,22,1,1,29.0,2,?,S,False,1
female,35,0,0,21.0,2,?,S,True,1
male,,0,0,0.0,2,?,S,True,0
male,19,0,0,13.0,2,?,S,True,0
female,44,1,0,26.0,2,?,S,False,0
male,54,1,0,26.0,2,?,S,False,0
male,52,0,0,13.5,2,?,S,True,0
male,37,1,0,26.0,2,?,S,False,0
female,29,1,0,26.0,2,?,S,False,0
female,25,1,1,30.0,2,?,S,False,1
female,45,0,2,30.0,2,?,S,False,1
male,29,1,0,26.0,2,?,S,False,0
female,28,1,0,26.0,2,?,S,False,1
male,29,0,0,10.5,2,?,S,True,0
male,28,0,0,13.0,2,?,S,True,0
male,24,0,0,10.5,2,?,S,True,1
female,8,0,2,26.25,2,?,S,False,1
male,31,1,1,26.25,2,?,S,False,0
female,31,1,1,26.25,2,?,S,False,1
female,22,0,0,10.5,2,F33,S,True,1
female,30,0,0,13.0,2,?,S,True,0
female,,0,0,21.0,2,?,S,True,0
male,21,0,0,11.5,2,?,S,True,0
male,,0,0,0.0,2,?,S,True,0
male,8,1,1,36.75,2,?,S,False,1
male,18,0,0,73.5,2,?,S,True,0
female,48,0,2,36.75,2,?,S,False,1
female,28,0,0,13.0,2,?,S,True,1
male,32,0,0,13.0,2,?,S,True,0
male,17,0,0,73.5,2,?,S,True,0
male,29,1,0,27.7208,2,?,C,False,0
female,24,1,0,27.7208,2,?,C,False,1
male,25,0,0,31.5,2,?,S,True,0
male,18,0,0,73.5,2,?,S,True,0
female,18,0,1,23.0,2,?,S,False,1
female,34,0,1,23.0,2,?,S,False,1
male,54,0,0,26.0,2,?,S,True,0
male,8,0,2,32.5,2,?,S,False,1
male,42,1,1,32.5,2,?,S,False,0
female,34,1,1,32.5,2,?,S,False,1
female,27,1,0,13.8583,2,?,C,False,1
female,30,1,0,13.8583,2,?,C,False,1
male,23,0,0,13.0,2,?,S,True,0
male,21,0,0,13.0,2,?,S,True,0
male,18,0,0,13.0,2,?,S,True,0
male,40,1,0,26.0,2,?,S,False,0
female,29,1,0,26.0,2,?,S,False,1
male,18,0,0,10.5,2,?,S,True,0
male,36,0,0,13.0,2,?,S,True,0
male,,0,0,0.0,2,?,S,True,0
female,38,0,0,13.0,2,?,S,True,0
male,35,0,0,26.0,2,?,S,True,0
male,38,1,0,21.0,2,?,S,False,0
male,34,1,0,21.0,2,?,S,False,0
female,34,0,0,13.0,2,?,S,True,1
male,16,0,0,26.0,2,?,S,True,0
male,26,0,0,10.5,2,?,S,True,0
male,47,0,0,10.5,2,?,S,True,0
male,21,1,0,11.5,2,?,S,False,0
male,21,1,0,11.5,2,?,S,False,0
male,24,0,0,13.5,2,?,S,True,0
male,24,0,0,13.0,2,?,S,True,0
male,34,0,0,13.0,2,?,S,True,0
male,30,0,0,13.0,2,?,S,True,0
male,52,0,0,13.0,2,?,S,True,0
male,30,0,0,13.0,2,?,S,True,0
male,0.6667,1,1,14.5,2,?,S,False,1
female,24,0,2,14.5,2,?,S,False,1
male,44,0,0,13.0,2,?,S,True,0
female,6,0,1,33.0,2,?,S,False,1
male,28,0,1,33.0,2,?,S,False,0
male,62,0,0,10.5,2,?,S,True,1
male,30,0,0,10.5,2,?,S,True,0
female,7,0,2,26.25,2,?,S,False,1
male,43,1,1,26.25,2,?,S,False,0
female,45,1,1,26.25,2,?,S,False,1
female,24,1,2,65.0,2,?,S,False,1
female,24,1,2,65.0,2,?,S,False,1
male,49,1,2,65.0,2,?,S,False,0
female,48,1,2,65.0,2,?,S,False,1
female,55,0,0,16.0,2,?,S,True,1
male,24,2,0,73.5,2,?,S,False,0
male,32,2,0,73.5,2,?,S,False,0
male,21,2,0,73.5,2,?,S,False,0
female,18,1,1,13.0,2,?,S,False,0
female,20,2,1,23.0,2,?,S,False,1
male,23,2,1,11.5,2,?,S,False,0
male,36,0,0,13.0,2,?,S,True,0
female,54,1,3,23.0,2,?,S,False,1
male,50,0,0,13.0,2,?,S,True,0
male,44,1,0,26.0,2,?,S,False,0
female,29,1,0,26.0,2,?,S,False,1
male,21,0,0,73.5,2,?,S,True,0
male,42,0,0,13.0,2,?,S,True,1
male,63,1,0,26.0,2,?,S,False,0
female,60,1,0,26.0,2,?,S,False,0
male,33,0,0,12.275,2,?,S,True,0
female,17,0,0,10.5,2,?,S,True,1
male,42,1,0,27.0,2,?,S,False,0
female,24,2,1,27.0,2,?,S,False,1
male,47,0,0,15.0,2,?,S,True,0
male,24,2,0,31.5,2,?,S,False,0
male,22,2,0,31.5,2,?,S,False,0
male,32,0,0,10.5,2,?,S,True,0
female,23,0,0,13.7917,2,D,C,True,1
male,34,1,0,26.0,2,?,S,False,0
female,24,1,0,26.0,2,?,S,False,1
female,22,0,0,21.0,2,?,S,True,0
female,,0,0,12.35,2,E101,Q,True,1
male,35,0,0,12.35,2,?,Q,True,0
female,45,0,0,13.5,2,?,S,True,1
male,57,0,0,12.35,2,?,Q,True,0
male,,0,0,0.0,2,?,S,True,0
male,31,0,0,10.5,2,?,S,True,0
female,26,1,1,26.0,2,?,S,False,0
male,30,1,1,26.0,2,?,S,False,0
male,,0,0,10.7083,2,?,Q,True,0
female,1,1,2,41.5792,2,?,C,False,1
female,3,1,2,41.5792,2,?,C,False,1
male,25,1,2,41.5792,2,?,C,False,0
female,22,1,2,41.5792,2,?,C,False,1
female,17,0,0,12.0,2,?,C,True,1
female,,0,0,33.0,2,?,S,True,1
female,34,0,0,10.5,2,F33,S,True,1
male,36,0,0,12.875,2,D,C,True,0
male,24,0,0,10.5,2,?,S,True,0
male,61,0,0,12.35,2,?,Q,True,0
male,50,1,0,26.0,2,?,S,False,0
female,42,1,0,26.0,2,?,S,False,1
female,57,0,0,10.5,2,E77,S,True,0
male,,0,0,15.0458,2,D,C,True,0
male,1,0,2,37.0042,2,?,C,False,1
male,31,1,1,37.0042,2,?,C,False,0
female,24,1,1,37.0042,2,?,C,False,1
male,,0,0,15.5792,2,?,C,True,0
male,30,0,0,13.0,2,?,S,True,0
male,40,0,0,16.0,2,?,S,True,0
male,32,0,0,13.5,2,?,S,True,0
male,30,0,0,13.0,2,?,S,True,0
male,46,0,0,26.0,2,?,S,True,0
female,13,0,1,19.5,2,?,S,False,1
female,41,0,1,19.5,2,?,S,False,1
male,19,0,0,10.5,2,?,S,True,1
male,39,0,0,13.0,2,?,S,True,0
male,48,0,0,13.0,2,?,S,True,0
male,70,0,0,10.5,2,?,S,True,0
male,27,0,0,13.0,2,?,S,True,0
male,54,0,0,14.0,2,?,S,True,0
male,39,0,0,26.0,2,?,S,True,0
male,16,0,0,10.5,2,?,S,True,0
male,62,0,0,9.6875,2,?,Q,True,0
male,32.5,1,0,30.0708,2,?,C,False,0
female,14,1,0,30.0708,2,?,C,False,1
male,2,1,1,26.0,2,F2,S,False,1
male,3,1,1,26.0,2,F2,S,False,1
male,36.5,0,2,26.0,2,F2,S,False,0
male,26,0,0,13.0,2,F2,S,True,0
male,19,1,1,36.75,2,?,S,False,0
male,28,0,0,13.5,2,?,S,True,0
male,20,0,0,13.8625,2,D38,C,True,1
female,29,0,0,10.5,2,F33,S,True,1
male,39,0,0,13.0,2,?,S,True,0
male,22,0,0,10.5,2,?,S,True,1
male,,0,0,13.8625,2,?,C,True,1
male,23,0,0,10.5,2,?,S,True,0
male,29,0,0,13.8583,2,?,C,True,1
male,28,0,0,10.5,2,?,S,True,0
male,,0,0,0.0,2,?,S,True,0
female,50,0,1,26.0,2,?,S,False,1
male,19,0,0,10.5,2,?,S,True,0
male,,0,0,15.05,2,?,C,True,0
male,41,0,0,13.0,2,?,S,True,0
female,21,0,1,21.0,2,?,S,False,1
female,19,0,0,26.0,2,?,S,True,1
male,43,0,1,21.0,2,?,S,False,0
female,32,0,0,13.0,2,?,S,True,1
male,34,0,0,13.0,2,?,S,True,0
male,30,0,0,12.7375,2,?,C,True,1
male,27,0,0,15.0333,2,?,C,True,0
female,2,1,1,26.0,2,?,S,False,1
female,8,1,1,26.0,2,?,S,False,1
female,33,0,2,26.0,2,?,S,False,1
male,36,0,0,10.5,2,?,S,True,0
male,34,1,0,21.0,2,?,S,False,0
female,30,3,0,21.0,2,?,S,False,1
female,28,0,0,13.0,2,?,S,True,1
male,23,0,0,15.0458,2,?,C,True,0
male,0.8333,1,1,18.75,2,?,S,False,1
male,3,1,1,18.75,2,?,S,False,1
female,24,2,3,18.75,2,?,S,False,1
female,50,0,0,10.5,2,?,S,True,1
male,19,0,0,10.5,2,?,S,True,0
female,21,0,0,10.5,2,?,S,True,1
male,26,0,0,13.0,2,?,S,True,0
male,25,0,0,13.0,2,?,S,True,0
male,27,0,0,26.0,2,?,S,True,0
female,25,0,1,26.0,2,?,S,False,1
female,18,0,2,13.0,2,?,S,False,1
female,20,0,0,36.75,2,?,S,True,1
female,30,0,0,13.0,2,?,S,True,1
male,59,0,0,13.5,2,?,S,True,0
female,30,0,0,12.35,2,?,Q,True,1
male,35,0,0,10.5,2,?,S,True,0
female,40,0,0,13.0,2,?,S,True,1
male,25,0,0,13.0,2,?,S,True,0
male,41,0,0,15.0458,2,?,C,True,0
male,25,0,0,10.5,2,?,S,True,0
male,18.5,0,0,13.0,2,F,S,True,0
male,14,0,0,65.0,2,?,S,True,0
female,50,0,0,10.5,2,?,S,True,1
male,23,0,0,13.0,2,?,S,True,0
female,28,0,0,12.65,2,?,S,True,1
female,27,0,0,10.5,2,E101,S,True,1
male,29,1,0,21.0,2,?,S,False,0
female,27,1,0,21.0,2,?,S,False,0
male,40,0,0,13.0,2,?,S,True,0
female,31,0,0,21.0,2,?,S,True,1
male,30,1,0,21.0,2,?,S,False,0
male,23,1,0,10.5,2,?,S,False,0
female,31,0,0,21.0,2,?,S,True,1
male,,0,0,0.0,2,?,S,True,0
female,12,0,0,15.75,2,?,S,True,1
female,40,0,0,15.75,2,?,S,True,1
female,32.5,0,0,13.0,2,E101,S,True,1
male,27,1,0,26.0,2,?,S,False,0
female,29,1,0,26.0,2,?,S,False,1
male,2,1,1,23.0,2,?,S,False,1
female,4,1,1,23.0,2,?,S,False,1
female,29,0,2,23.0,2,?,S,False,1
female,0.9167,1,2,27.75,2,?,S,False,1
female,5,1,2,27.75,2,?,S,False,1
male,36,1,2,27.75,2,?,S,False,0
female,33,1,2,27.75,2,?,S,False,1
male,66,0,0,10.5,2,?,S,True,0
male,,0,0,12.875,2,?,S,True,0
male,31,0,0,13.0,2,?,S,True,1
male,,0,0,13.0,2,?,S,True,1
female,26,0,0,13.5,2,?,S,True,1
female,24,0,0,13.0,2,?,S,True,0
male,42,0,0,7.55,3,?,S,True,0
male,13,0,2,20.25,3,?,S,False,0
male,16,1,1,20.25,3,?,S,False,0
female,35,1,1,20.25,3,?,S,False,1
female,16,0,0,7.65,3,?,S,True,1
male,25,0,0,7.65,3,F G63,S,True,1
male,20,0,0,7.925,3,?,S,True,1
female,18,0,0,7.2292,3,?,C,True,1
male,30,0,0,7.25,3,?,S,True,0
male,26,0,0,8.05,3,?,S,True,0
female,40,1,0,9.475,3,?,S,False,0
male,0.8333,0,1,9.35,3,?,S,False,1
female,18,0,1,9.35,3,?,S,False,1
male,26,0,0,18.7875,3,?,C,True,1
male,26,0,0,7.8875,3,?,S,True,0
male,20,0,0,7.925,3,?,S,True,0
male,24,0,0,7.05,3,?,S,True,0
male,25,0,0,7.05,3,?,S,True,0
male,35,0,0,8.05,3,?,S,True,0
male,18,0,0,8.3,3,?,S,True,0
male,32,0,0,22.525,3,?,S,True,0
female,19,1,0,7.8542,3,?,S,False,1
male,4,4,2,31.275,3,?,S,False,0
female,6,4,2,31.275,3,?,S,False,0
female,2,4,2,31.275,3,?,S,False,0
female,17,4,2,7.925,3,?,S,False,1
female,38,4,2,7.775,3,?,S,False,0
female,9,4,2,31.275,3,?,S,False,0
female,11,4,2,31.275,3,?,S,False,0
male,39,1,5,31.275,3,?,S,False,0
male,27,0,0,7.7958,3,?,S,True,1
male,26,0,0,7.775,3,?,S,True,0
female,39,1,5,31.275,3,?,S,False,0
male,20,0,0,7.8542,3,?,S,True,0
male,26,0,0,7.8958,3,?,S,True,0
male,25,1,0,17.8,3,?,S,False,0
female,18,1,0,17.8,3,?,S,False,0
male,24,0,0,7.775,3,?,S,True,0
male,35,0,0,7.05,3,?,S,True,0
male,5,4,2,31.3875,3,?,S,False,0
male,9,4,2,31.3875,3,?,S,False,0
male,3,4,2,31.3875,3,?,S,False,1
male,13,4,2,31.3875,3,?,S,False,0
female,5,4,2,31.3875,3,?,S,False,1
male,40,1,5,31.3875,3,?,S,False,0
male,23,0,0,7.7958,3,?,S,True,1
female,38,1,5,31.3875,3,?,S,False,1
female,45,0,0,7.225,3,?,C,True,1
male,21,0,0,7.225,3,?,C,True,0
male,23,0,0,7.05,3,?,S,True,0
female,17,0,0,14.4583,3,?,C,True,0
male,30,0,0,7.225,3,?,C,True,0
male,23,0,0,7.8542,3,?,S,True,0
female,13,0,0,7.2292,3,?,C,True,1
male,20,0,0,7.225,3,?,C,True,0
male,32,1,0,15.85,3,?,S,False,0
female,33,3,0,15.85,3,?,S,False,1
female,0.75,2,1,19.2583,3,?,C,False,1
female,0.75,2,1,19.2583,3,?,C,False,1
female,5,2,1,19.2583,3,?,C,False,1
female,24,0,3,19.2583,3,?,C,False,1
female,18,0,0,8.05,3,?,S,True,1
male,40,0,0,7.225,3,?,C,True,0
male,26,0,0,7.8958,3,?,S,True,0
male,20,0,0,7.2292,3,?,C,True,1
female,18,0,1,14.4542,3,?,C,False,0
female,45,0,1,14.4542,3,?,C,False,0
female,27,0,0,7.8792,3,?,Q,True,0
male,22,0,0,8.05,3,?,S,True,0
male,19,0,0,8.05,3,?,S,True,0
male,26,0,0,7.775,3,?,S,True,0
male,22,0,0,9.35,3,?,S,True,0
male,,0,0,7.2292,3,?,C,True,0
male,20,0,0,4.0125,3,?,C,True,0
male,32,0,0,56.4958,3,?,S,True,1
male,21,0,0,7.775,3,?,S,True,0
male,18,0,0,7.75,3,?,S,True,0
male,26,0,0,7.8958,3,?,S,True,0
male,6,1,1,15.2458,3,?,C,False,0
female,9,1,1,15.2458,3,?,C,False,0
male,,0,0,7.225,3,?,C,True,0
female,,0,2,15.2458,3,?,C,False,0
female,,0,2,7.75,3,?,Q,False,0
male,40,1,1,15.5,3,?,Q,False,0
female,32,1,1,15.5,3,?,Q,False,0
male,21,0,0,16.1,3,?,S,True,0
female,22,0,0,7.725,3,?,Q,True,1
female,20,0,0,7.8542,3,?,S,True,0
male,29,1,0,7.0458,3,?,S,False,0
male,22,1,0,7.25,3,?,S,False,0
male,22,0,0,7.7958,3,?,S,True,0
male,35,0,0,8.05,3,?,S,True,0
female,18.5,0,0,7.2833,3,?,Q,True,0
male,21,0,0,7.8208,3,?,Q,True,1
male,19,0,0,6.75,3,?,Q,True,0
female,18,0,0,7.8792,3,?,Q,True,0
female,21,0,0,8.6625,3,?,S,True,0
female,30,0,0,8.6625,3,?,S,True,0
male,18,0,0,8.6625,3,?,S,True,0
male,38,0,0,8.6625,3,?,S,True,0
male,17,0,0,8.6625,3,?,S,True,0
male,17,0,0,8.6625,3,?,S,True,0
female,21,0,0,7.75,3,?,Q,True,0
male,21,0,0,7.75,3,?,Q,True,0
male,21,0,0,8.05,3,?,S,True,0
male,,1,0,14.4583,3,?,C,False,0
female,,1,0,14.4583,3,?,C,False,0
male,28,0,0,7.7958,3,?,S,True,0
male,24,0,0,7.8542,3,?,S,True,0
female,16,0,0,7.75,3,?,Q,True,1
female,37,0,0,7.75,3,?,Q,True,0
male,28,0,0,7.25,3,?,S,True,0
male,24,0,0,8.05,3,?,S,True,0
male,21,0,0,7.7333,3,?,Q,True,0
male,32,0,0,56.4958,3,?,S,True,1
male,29,0,0,8.05,3,?,S,True,0
male,26,1,0,14.4542,3,?,C,False,0
male,18,1,0,14.4542,3,?,C,False,0
male,20,0,0,7.05,3,?,S,True,0
male,18,0,0,8.05,3,?,S,True,1
male,24,0,0,7.25,3,?,Q,True,0
male,36,0,0,7.4958,3,?,S,True,0
male,24,0,0,7.4958,3,?,S,True,0
male,31,0,0,7.7333,3,?,Q,True,0
male,31,0,0,7.75,3,?,Q,True,0
female,22,0,0,7.75,3,?,Q,True,1
female,30,0,0,7.6292,3,?,Q,True,0
male,70.5,0,0,7.75,3,?,Q,True,0
male,43,0,0,8.05,3,?,S,True,0
male,35,0,0,7.8958,3,?,S,True,0
male,27,0,0,7.8958,3,?,S,True,0
male,19,0,0,7.8958,3,?,S,True,0
male,30,0,0,8.05,3,?,S,True,0
male,9,1,1,15.9,3,?,S,False,1
male,3,1,1,15.9,3,?,S,False,1
female,36,0,2,15.9,3,?,S,False,1
male,59,0,0,7.25,3,?,S,True,0
male,19,0,0,8.1583,3,?,S,True,0
female,17,0,1,16.1,3,?,S,False,1
male,44,0,1,16.1,3,?,S,False,0
male,17,0,0,8.6625,3,?,S,True,0
male,22.5,0,0,7.225,3,?,C,True,0
male,45,0,0,8.05,3,?,S,True,1
female,22,0,0,10.5167,3,?,S,True,0
male,19,0,0,10.1708,3,?,S,True,0
female,30,0,0,6.95,3,?,Q,True,1
male,29,0,0,7.75,3,?,Q,True,1
male,0.3333,0,2,14.4,3,?,S,False,0
male,34,1,1,14.4,3,?,S,False,0
female,28,1,1,14.4,3,?,S,False,0
male,27,0,0,7.8958,3,?,S,True,0
male,25,0,0,7.8958,3,?,S,True,0
male,24,2,0,24.15,3,?,S,False,0
male,22,0,0,8.05,3,?,S,True,0
male,21,2,0,24.15,3,?,S,False,0
male,17,2,0,8.05,3,?,S,False,0
male,,1,0,16.1,3,?,S,False,0
female,,1,0,16.1,3,?,S,False,1
male,36.5,1,0,17.4,3,?,S,False,1
female,36,1,0,17.4,3,?,S,False,1
male,30,0,0,9.5,3,?,S,True,1
male,16,0,0,9.5,3,?,S,True,0
male,1,1,2,20.575,3,?,S,False,1
female,0.1667,1,2,20.575,3,?,S,False,1
male,26,1,2,20.575,3,?,S,False,0
female,33,1,2,20.575,3,?,S,False,1
male,25,0,0,7.8958,3,?,S,True,0
male,,0,0,7.8958,3,?,S,True,0
male,,0,0,7.8958,3,?,S,True,0
male,22,0,0,7.25,3,?,S,True,0
male,36,0,0,7.25,3,?,S,True,0
female,19,0,0,7.8792,3,?,Q,True,1
male,17,0,0,7.8958,3,?,S,True,0
male,42,0,0,8.6625,3,?,S,True,0
male,43,0,0,7.8958,3,?,S,True,0
male,,0,0,7.2292,3,?,C,True,0
male,32,0,0,7.75,3,?,Q,True,0
male,19,0,0,8.05,3,?,S,True,1
female,30,0,0,12.475,3,?,S,True,1
female,24,0,0,7.75,3,?,Q,True,0
female,23,0,0,8.05,3,?,S,True,1
male,33,0,0,7.8958,3,?,C,True,0
male,65,0,0,7.75,3,?,Q,True,0
male,24,0,0,7.55,3,?,S,True,1
male,23,1,0,13.9,3,?,S,False,0
female,22,1,0,13.9,3,?,S,False,1
male,18,0,0,7.775,3,?,S,True,0
male,16,0,0,7.775,3,?,S,True,0
male,45,0,0,6.975,3,?,S,True,0
male,,0,0,7.225,3,?,C,True,0
male,39,0,2,7.2292,3,?,C,False,0
male,17,1,1,7.2292,3,?,C,False,0
male,15,1,1,7.2292,3,?,C,False,0
male,47,0,0,7.25,3,?,S,True,0
female,5,0,0,12.475,3,?,S,True,1
male,,0,0,7.225,3,?,C,True,0
male,40.5,0,0,15.1,3,?,S,True,0
male,40.5,0,0,7.75,3,?,Q,True,0
male,,0,0,7.05,3,?,S,True,1
male,18,0,0,7.7958,3,?,S,True,0
female,,0,0,7.75,3,?,Q,True,0
male,,0,0,7.75,3,?,Q,True,0
male,,0,0,6.95,3,?,Q,True,0
male,26,0,0,7.8792,3,?,Q,True,0
male,,0,0,7.75,3,?,Q,True,0
male,,0,0,56.4958,3,?,S,True,1
female,21,2,2,34.375,3,?,S,False,0
female,9,2,2,34.375,3,?,S,False,0
male,,0,0,8.05,3,?,S,True,0
male,18,2,2,34.375,3,?,S,False,0
male,16,1,3,34.375,3,?,S,False,0
female,48,1,3,34.375,3,?,S,False,0
male,,0,0,7.75,3,?,Q,True,0
male,,0,0,7.25,3,?,S,True,0
male,25,0,0,7.7417,3,?,Q,True,0
male,,0,0,14.5,3,?,S,True,0
male,,0,0,7.8958,3,?,C,True,0
male,22,0,0,8.05,3,?,S,True,0
female,16,0,0,7.7333,3,?,Q,True,1
female,,0,0,7.75,3,?,Q,True,1
male,9,0,2,20.525,3,?,S,False,1
male,33,1,1,20.525,3,?,S,False,0
male,41,0,0,7.85,3,?,S,True,0
female,31,1,1,20.525,3,?,S,False,1
male,38,0,0,7.05,3,?,S,True,0
male,9,5,2,46.9,3,?,S,False,0
male,1,5,2,46.9,3,?,S,False,0
male,11,5,2,46.9,3,?,S,False,0
female,10,5,2,46.9,3,?,S,False,0
female,16,5,2,46.9,3,?,S,False,0
male,14,5,2,46.9,3,?,S,False,0
male,40,1,6,46.9,3,?,S,False,0
female,43,1,6,46.9,3,?,S,False,0
male,51,0,0,8.05,3,?,S,True,0
male,32,0,0,8.3625,3,?,S,True,0
male,,0,0,8.05,3,?,S,True,0
male,20,0,0,9.8458,3,?,S,True,0
male,37,2,0,7.925,3,?,S,False,0
male,28,2,0,7.925,3,?,S,False,0
male,19,0,0,7.775,3,?,S,True,0
female,24,0,0,8.85,3,?,S,True,0
female,17,0,0,7.7333,3,?,Q,True,0
male,,1,0,19.9667,3,?,S,False,0
male,,1,0,19.9667,3,?,S,False,0
male,28,1,0,15.85,3,?,S,False,0
female,24,1,0,15.85,3,?,S,False,1
male,20,0,0,9.5,3,?,S,True,0
male,23.5,0,0,7.2292,3,?,C,True,0
male,41,2,0,14.1083,3,?,S,False,0
male,26,1,0,7.8542,3,?,S,False,0
male,21,0,0,7.8542,3,?,S,True,0
female,45,1,0,14.1083,3,?,S,False,1
female,,0,0,7.55,3,?,S,True,0
male,25,0,0,7.25,3,?,S,True,0
male,,0,0,6.8583,3,?,Q,True,0
male,11,0,0,18.7875,3,?,C,True,0
female,,0,0,7.75,3,?,Q,True,1
male,27,0,0,6.975,3,?,S,True,1
male,,0,0,56.4958,3,?,S,True,1
female,18,0,0,6.75,3,?,Q,True,0
female,26,0,0,7.925,3,?,S,True,1
female,23,0,0,7.925,3,?,S,True,0
female,22,0,0,8.9625,3,?,S,True,1
male,28,0,0,7.8958,3,?,S,True,0
female,28,0,0,7.775,3,?,S,True,0
female,,0,0,7.75,3,?,Q,True,0
female,2,0,1,12.2875,3,?,S,False,1
female,22,1,1,12.2875,3,?,S,False,1
male,43,0,0,6.45,3,?,S,True,0
male,28,0,0,22.525,3,?,S,True,0
female,27,0,0,7.925,3,?,S,True,1
male,,0,0,7.75,3,?,Q,True,0
female,,0,0,8.05,3,?,S,True,1
male,42,0,0,7.65,3,F G63,S,True,0
male,,0,0,7.8875,3,?,S,True,1
male,30,0,0,7.2292,3,?,C,True,0
male,,0,0,7.8958,3,?,S,True,0
female,27,1,0,7.925,3,?,S,False,0
female,25,1,0,7.925,3,?,S,False,0
male,,0,0,7.8958,3,?,S,True,0
male,29,0,0,7.8958,3,?,C,True,1
male,21,0,0,7.7958,3,?,S,True,1
male,,0,0,7.05,3,?,S,True,0
male,20,0,0,7.8542,3,?,S,True,0
male,48,0,0,7.8542,3,?,S,True,0
male,17,1,0,7.0542,3,?,S,False,0
female,,0,0,7.75,3,?,Q,True,1
male,,0,0,8.1125,3,?,S,True,1
male,34,0,0,6.4958,3,?,S,True,0
male,26,0,0,7.775,3,?,S,True,1
male,22,0,0,7.7958,3,?,S,True,0
male,33,0,0,8.6542,3,?,S,True,0
male,31,0,0,7.775,3,?,S,True,0
male,29,0,0,7.8542,3,?,S,True,0
male,4,1,1,11.1333,3,?,S,False,1
female,1,1,1,11.1333,3,?,S,False,1
male,49,0,0,0.0,3,?,S,True,0
male,33,0,0,7.775,3,?,S,True,0
male,19,0,0,0.0,3,?,S,True,0
female,27,0,2,11.1333,3,?,S,False,1
male,,1,2,23.45,3,?,S,False,0
female,,1,2,23.45,3,?,S,False,0
male,,1,2,23.45,3,?,S,False,0
female,,1,2,23.45,3,?,S,False,0
male,23,0,0,7.8958,3,?,S,True,0
male,32,0,0,7.8542,3,?,S,True,1
male,27,0,0,7.8542,3,?,S,True,0
female,20,1,0,9.825,3,?,S,False,0
female,21,1,0,9.825,3,?,S,False,0
male,32,0,0,7.925,3,?,S,True,1
male,17,0,0,7.125,3,?,S,True,0
male,21,0,0,8.4333,3,?,S,True,0
male,30,0,0,7.8958,3,?,S,True,0
male,21,0,0,7.7958,3,?,S,True,1
male,33,0,0,7.8542,3,?,S,True,0
male,22,0,0,7.5208,3,?,S,True,0
female,4,0,1,13.4167,3,?,C,False,1
male,39,0,1,13.4167,3,?,C,False,1
male,,0,0,7.2292,3,?,C,True,0
male,18.5,0,0,7.2292,3,?,C,True,0
male,,0,0,7.75,3,?,Q,True,0
male,,0,0,7.25,3,?,S,True,0
female,,0,0,7.75,3,?,Q,True,1
female,,0,0,7.75,3,?,Q,True,1
male,34.5,0,0,7.8292,3,?,Q,True,0
male,44,0,0,8.05,3,?,S,True,0
male,,0,0,7.75,3,?,Q,True,1
male,,1,0,14.4542,3,?,C,False,0
female,,1,0,14.4542,3,?,C,False,0
male,,1,0,7.75,3,?,Q,False,0
male,,1,0,7.75,3,?,Q,False,0
male,,0,0,7.7375,3,?,Q,True,0
female,22,2,0,8.6625,3,?,S,False,0
male,26,2,0,8.6625,3,?,S,False,0
female,4,0,2,22.025,3,?,S,False,1
male,29,3,1,22.025,3,?,S,False,1
female,26,1,1,22.025,3,?,S,False,1
female,1,1,1,12.1833,3,?,S,False,0
male,18,1,1,7.8542,3,?,S,False,0
female,36,0,2,12.1833,3,?,S,False,0
male,,0,0,7.8958,3,?,C,True,0
male,25,0,0,7.2292,3,F E57,C,True,1
male,,0,0,7.225,3,?,C,True,0
female,37,0,0,9.5875,3,?,S,True,0
male,,0,0,7.8958,3,?,S,True,0
male,,0,0,56.4958,3,?,S,True,1
male,,0,0,56.4958,3,?,S,True,0
female,22,0,0,7.25,3,?,S,True,1
male,,0,0,7.75,3,?,Q,True,0
male,26,0,0,56.4958,3,?,S,True,1
male,29,0,0,9.4833,3,?,S,True,0
male,29,0,0,7.775,3,?,S,True,0
male,22,0,0,7.775,3,?,S,True,0
male,22,0,0,7.225,3,?,C,True,1
male,,3,1,25.4667,3,?,S,False,0
female,,3,1,25.4667,3,?,S,False,0
female,,3,1,25.4667,3,?,S,False,0
female,,3,1,25.4667,3,?,S,False,0
female,,0,4,25.4667,3,?,S,False,0
male,32,0,0,7.925,3,?,S,True,0
male,34.5,0,0,6.4375,3,?,C,True,0
female,,1,0,15.5,3,?,Q,False,0
male,,1,0,15.5,3,?,Q,False,0
male,36,0,0,0.0,3,?,S,True,0
male,39,0,0,24.15,3,?,S,True,0
male,24,0,0,9.5,3,?,S,True,0
female,25,0,0,7.775,3,?,S,True,0
female,45,0,0,7.75,3,?,S,True,0
male,36,1,0,15.55,3,?,S,False,0
female,30,1,0,15.55,3,?,S,False,0
male,20,1,0,7.925,3,?,S,False,1
male,,0,0,7.8792,3,?,Q,True,0
male,28,0,0,56.4958,3,?,S,True,0
male,,0,0,7.55,3,?,S,True,0
male,30,1,0,16.1,3,?,S,False,0
female,26,1,0,16.1,3,?,S,False,0
male,,0,0,7.8792,3,?,S,True,0
male,20.5,0,0,7.25,3,?,S,True,0
male,27,0,0,8.6625,3,?,S,True,1
male,51,0,0,7.0542,3,?,S,True,0
female,23,0,0,7.8542,3,?,S,True,1
male,32,0,0,7.5792,3,?,S,True,1
male,,0,0,7.8958,3,?,S,True,0
male,,0,0,7.55,3,?,S,True,0
female,,0,0,7.75,3,?,Q,True,1
male,24,0,0,7.1417,3,?,S,True,1
male,22,0,0,7.125,3,?,S,True,0
female,,0,0,7.8792,3,?,Q,True,0
male,,0,0,7.75,3,?,Q,True,0
male,,0,0,8.05,3,?,S,True,0
male,29,0,0,7.925,3,?,S,True,0
male,,0,0,7.2292,3,?,C,True,1
female,30.5,0,0,7.75,3,?,Q,True,0
female,,0,0,7.7375,3,?,Q,True,1
male,,0,0,7.2292,3,F E46,C,True,0
male,35,0,0,7.8958,3,?,C,True,0
male,33,0,0,7.8958,3,?,S,True,0
female,,0,0,7.225,3,?,C,True,1
male,,0,0,7.8958,3,?,C,True,0
female,,0,0,7.75,3,?,Q,True,1
male,,0,0,7.75,3,?,Q,True,1
female,,2,0,23.25,3,?,Q,False,1
female,,2,0,23.25,3,?,Q,False,1
male,,2,0,23.25,3,?,Q,False,1
female,,0,0,7.7875,3,?,Q,True,1
male,,0,0,15.5,3,?,Q,True,0
female,,0,0,7.8792,3,?,Q,True,1
female,15,0,0,8.0292,3,?,Q,True,1
female,35,0,0,7.75,3,?,Q,True,0
male,,0,0,7.75,3,?,Q,True,0
male,24,1,0,16.1,3,?,S,False,0
female,19,1,0,16.1,3,?,S,False,0
female,,0,0,7.75,3,?,Q,True,0
female,,0,0,8.05,3,?,S,True,0
female,,0,0,8.05,3,?,S,True,0
male,55.5,0,0,8.05,3,?,S,True,0
male,,0,0,7.75,3,?,Q,True,0
male,21,0,0,7.775,3,?,S,True,1
male,,0,0,8.05,3,?,S,True,0
male,24,0,0,7.8958,3,?,S,True,0
male,21,0,0,7.8958,3,?,S,True,0
male,28,0,0,7.8958,3,?,S,True,0
male,,0,0,7.8958,3,?,S,True,0
female,,0,0,7.8792,3,?,Q,True,1
male,25,0,0,7.65,3,F G73,S,True,0
male,6,0,1,12.475,3,E121,S,False,1
female,27,0,1,12.475,3,E121,S,False,1
male,,0,0,8.05,3,?,S,True,0
female,,1,0,24.15,3,?,Q,False,1
male,,1,0,24.15,3,?,Q,False,0
male,,0,0,8.4583,3,?,Q,True,0
male,34,0,0,8.05,3,?,S,True,0
male,,0,0,7.75,3,?,Q,True,0
male,,0,0,7.775,3,?,S,True,1
male,,1,1,15.2458,3,?,C,False,1
male,,1,1,15.2458,3,?,C,False,1
female,,0,2,15.2458,3,?,C,False,1
female,,0,0,7.2292,3,?,C,True,1
male,,0,0,8.05,3,?,S,True,0
female,,0,0,7.7333,3,?,Q,True,1
female,24,0,0,7.75,3,?,Q,True,1
male,,0,0,8.05,3,?,S,True,0
female,,1,0,15.5,3,?,Q,False,1
female,,1,0,15.5,3,?,Q,False,1
female,,0,0,15.5,3,?,Q,True,1
male,18,0,0,7.75,3,?,S,True,0
male,22,0,0,7.8958,3,?,S,True,0
================================================
FILE: benchmark/experiments/__init__.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from benchmark.experiments import structured_data
def get_experiments(task_name):
if task_name == "structured_data_classification":
return [
structured_data.Titanic(),
structured_data.Iris(),
structured_data.Wine(),
]
================================================
FILE: benchmark/experiments/experiment.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import shutil
import timeit
class Experiment(object):
def __init__(self, name, tmp_dir="tmp_dir"):
self.name = name
self.tmp_dir = tmp_dir
def get_auto_model(self):
raise NotImplementedError
@staticmethod
def load_data():
raise NotImplementedError
def run_once(self):
(x_train, y_train), (x_test, y_test) = self.load_data()
auto_model = self.get_auto_model()
start_time = timeit.default_timer()
auto_model.fit(x_train, y_train)
stop_time = timeit.default_timer()
accuracy = auto_model.evaluate(x_test, y_test)[1]
total_time = stop_time - start_time
return total_time, accuracy
def run(self, repeat_times=1):
total_times = []
metric_values = []
for i in range(repeat_times):
total_time, metric = self.run_once()
total_times.append(total_time)
metric_values.append(metric)
self.tear_down()
return total_times, metric_values
def tear_down(self):
shutil.rmtree(self.tmp_dir)
================================================
FILE: benchmark/experiments/image.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from keras import datasets
import autokeras as ak
from benchmark.experiments import experiment
class ImageClassifierExperiment(experiment.Experiment):
def get_auto_model(self):
return ak.ImageClassifier(
max_trials=10, directory=self.tmp_dir, overwrite=True
)
class MNIST(ImageClassifierExperiment):
def __init__(self):
super().__init__(name="MNIST")
@staticmethod
def load_data():
return datasets.mnist.load_data()
class CIFAR10(ImageClassifierExperiment):
def __init__(self):
super().__init__(name="CIFAR10")
@staticmethod
def load_data():
return datasets.cifar10.load_data()
================================================
FILE: benchmark/experiments/structured_data.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import keras
import numpy as np
import pandas as pd
import sklearn
import autokeras as ak
from benchmark.experiments import experiment
class StructuredDataClassifierExperiment(experiment.Experiment):
def get_auto_model(self):
return ak.StructuredDataClassifier(
max_trials=10, directory=self.tmp_dir, overwrite=True
)
class Titanic(StructuredDataClassifierExperiment):
def __init__(self):
super().__init__(name="Titanic")
@staticmethod
def load_data():
TRAIN_DATA_URL = (
"https://storage.googleapis.com/tf-datasets/titanic/train.csv"
)
TEST_DATA_URL = (
"https://storage.googleapis.com/tf-datasets/titanic/eval.csv"
)
x_train = keras.utils.get_file("titanic_train.csv", TRAIN_DATA_URL)
x_test = keras.utils.get_file("titanic_eval.csv", TEST_DATA_URL)
return (x_train, "survived"), (x_test, "survived")
class Iris(StructuredDataClassifierExperiment):
def __init__(self):
super().__init__(name="Iris")
@staticmethod
def load_data():
# Prepare the dataset.
TRAIN_DATA_URL = (
"https://storage.googleapis.com/"
"download.tensorflow.org/data/iris_training.csv"
)
x_train = keras.utils.get_file("iris_train.csv", TRAIN_DATA_URL)
TEST_DATA_URL = (
"https://storage.googleapis.com/"
"download.tensorflow.org/data/iris_test.csv"
)
x_test = keras.utils.get_file("iris_test.csv", TEST_DATA_URL)
return (x_train, "virginica"), (x_test, "virginica")
class Wine(StructuredDataClassifierExperiment):
def __init__(self):
super().__init__(name="Wine")
@staticmethod
def load_data():
DATASET_URL = (
"https://archive.ics.uci.edu/ml/"
"machine-learning-databases/wine/wine.data"
)
# save data
dataset = keras.utils.get_file("wine.csv", DATASET_URL)
data = pd.read_csv(dataset, header=None).sample(frac=1, random_state=5)
split_length = int(data.shape[0] * 0.8) # 141
return (data.iloc[:split_length, 1:], data.iloc[:split_length, 0]), (
data.iloc[split_length:, 1:],
data.iloc[split_length:, 0],
)
class StructuredDataRegressorExperiment(experiment.Experiment):
def get_auto_model(self):
return ak.StructuredDataRegressor(
max_trials=10, directory=self.tmp_dir, overwrite=True
)
class CaliforniaHousing(StructuredDataRegressorExperiment):
@staticmethod
def load_data():
house_dataset = sklearn.datasets.fetch_california_housing()
(
x_train,
x_test,
y_train,
y_test,
) = sklearn.model_selection.train_test_split(
house_dataset.data,
np.array(house_dataset.target),
test_size=0.2,
random_state=42,
)
return (x_train, y_train), (x_test, y_test)
================================================
FILE: benchmark/experiments/text.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import keras
import numpy as np
from sklearn.datasets import load_files
import autokeras as ak
from benchmark.experiments import experiment
class IMDB(experiment.Experiment):
def __init__(self):
super().__init__(name="IMDB")
def get_auto_model(self):
return ak.TextClassifier(
max_trials=10, directory=self.tmp_dir, overwrite=True
)
@staticmethod
def load_data():
dataset = keras.utils.get_file(
fname="aclImdb.tar.gz",
origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz", # noqa: E501
extract=True,
)
# set path to dataset
IMDB_DATADIR = os.path.join(os.path.dirname(dataset), "aclImdb")
classes = ["pos", "neg"]
train_data = load_files(
os.path.join(IMDB_DATADIR, "train"),
shuffle=True,
categories=classes,
)
test_data = load_files(
os.path.join(IMDB_DATADIR, "test"),
shuffle=False,
categories=classes,
)
x_train = np.array(train_data.data)
y_train = np.array(train_data.target)
x_test = np.array(test_data.data)
y_test = np.array(test_data.target)
return (x_train, y_train), (x_test, y_test)
================================================
FILE: benchmark/performance.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import keras
import numpy as np
from keras.datasets import cifar10
from keras.datasets import mnist
from sklearn.datasets import load_files
import autokeras as ak
def imdb_raw(num_instances=100):
dataset = keras.utils.get_file(
fname="aclImdb.tar.gz",
origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz",
extract=True,
)
# set path to dataset
IMDB_DATADIR = os.path.join(os.path.dirname(dataset), "aclImdb")
classes = ["pos", "neg"]
train_data = load_files(
os.path.join(IMDB_DATADIR, "train"), shuffle=True, categories=classes
)
test_data = load_files(
os.path.join(IMDB_DATADIR, "test"), shuffle=False, categories=classes
)
x_train = np.array(train_data.data)
y_train = np.array(train_data.target)
x_test = np.array(test_data.data)
y_test = np.array(test_data.target)
if num_instances is not None:
x_train = x_train[:num_instances]
y_train = y_train[:num_instances]
x_test = x_test[:num_instances]
y_test = y_test[:num_instances]
return (x_train, y_train), (x_test, y_test)
def test_mnist_accuracy_over_98(tmp_path):
(x_train, y_train), (x_test, y_test) = mnist.load_data()
clf = ak.ImageClassifier(max_trials=1, directory=tmp_path)
clf.fit(x_train, y_train, epochs=10)
accuracy = clf.evaluate(x_test, y_test)[1]
assert accuracy >= 0.98
def test_cifar10_accuracy_over_93(tmp_path):
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
clf = ak.ImageClassifier(max_trials=3, directory=tmp_path)
clf.fit(x_train, y_train, epochs=5)
accuracy = clf.evaluate(x_test, y_test)[1]
assert accuracy >= 0.93
def test_imdb_accuracy_over_92(tmp_path):
(x_train, y_train), (x_test, y_test) = imdb_raw(num_instances=None)
clf = ak.TextClassifier(max_trials=3, directory=tmp_path)
clf.fit(x_train, y_train, batch_size=6, epochs=1)
accuracy = clf.evaluate(x_test, y_test)[1]
assert accuracy >= 0.92
def test_titaninc_accuracy_over_77(tmp_path):
TRAIN_DATA_URL = (
"https://storage.googleapis.com/tf-datasets/titanic/train.csv"
)
TEST_DATA_URL = (
"https://storage.googleapis.com/tf-datasets/titanic/eval.csv"
)
train_file_path = keras.utils.get_file("train.csv", TRAIN_DATA_URL)
test_file_path = keras.utils.get_file("eval.csv", TEST_DATA_URL)
clf = ak.StructuredDataClassifier(max_trials=10, directory=tmp_path)
clf.fit(train_file_path, "survived")
accuracy = clf.evaluate(test_file_path, "survived")[1]
assert accuracy >= 0.77
================================================
FILE: benchmark/run.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import statistics
import sys
from benchmark import experiments as exp_module
def generate_report(experiments):
report = [
",".join(
[
"dataset_name",
"average_time",
"metrics_average",
"metrics_standard_deviation",
]
)
]
for experiment in experiments:
total_times, metric_values = experiment.run(repeat_times=10)
mean_time = statistics.mean(total_times)
mean = statistics.mean(metric_values)
std = statistics.stdev(metric_values)
report.append(
",".join([experiment.name, str(mean_time), str(mean), str(std)])
)
return "\n".join(report)
def main(argv):
task = sys.argv[1]
path = sys.argv[2]
report = generate_report(exp_module.get_experiments(task))
with open(path, "w") as file:
file.write(report)
if __name__ == "__main__":
main(sys.argv)
================================================
FILE: codecov.yml
================================================
coverage:
status:
project:
default:
target: 100%
patch:
default:
target: 100%
================================================
FILE: docker/Dockerfile
================================================
FROM tensorflow/tensorflow:2.3.0
WORKDIR /opt/autokeras
COPY . .
RUN python -m pip install --no-cache-dir --editable .
WORKDIR /work
================================================
FILE: docker/Makefile
================================================
help:
@cat Makefile
DATA?="${HOME}/Data"
GPUS?=all
DOCKER_FILE?=Dockerfile
DOCKER=docker
TF_VERSION=2.3.0
TEST=tests/
SRC?=$(shell dirname `pwd`)
build:
docker build -t autokeras --build-arg TF_VERSION=$(TF_VERSION) -f $(DOCKER_FILE) .
bash: build
$(DOCKER) run --gpus $(GPUS) -it -v $(SRC):/src/workspace -v $(DATA):/data autokeras bash
ipython: build
$(DOCKER) run --gpus $(GPUS) -it -v $(SRC):/src/workspace -v $(DATA):/data --env autokeras ipython
notebook: build
$(DOCKER) run --gpus $(GPUS) -it -v $(SRC):/src/workspace -v $(DATA):/data --net=host --env autokeras
test: build
$(DOCKER) run --gpus $(GPUS) -it -v $(SRC):/src/workspace -v $(DATA):/data --env autokeras py.test $(TEST)
================================================
FILE: docker/README.md
================================================
# Using Autokeras via Docker
This directory contains `Dockerfile` to make it easy to get up and running with
Autokeras via [Docker](http://www.docker.com/).
## Installing Docker
General installation instructions are
[on the Docker site](https://docs.docker.com/installation/), but we give some
quick links here:
* [OSX](https://docs.docker.com/installation/mac/): [docker toolbox](https://www.docker.com/toolbox)
* [ubuntu](https://docs.docker.com/installation/ubuntulinux/)
## Running the container
We are using `Makefile` to simplify docker commands within make commands.
Build the container and start a Jupyter Notebook
$ make notebook
Build the container and start an iPython shell
$ make ipython
Build the container and start a bash
$ make bash
For GPU support install NVIDIA drivers (ideally latest) and
[nvidia-docker](https://github.com/NVIDIA/nvidia-docker). Run using
$ make notebook GPU=all # or [ipython, bash]
Mount a volume for external data sets
$ make DATA=~/mydata
Prints all make tasks
$ make help
Note: If you would have a problem running nvidia-docker you may try the old way
we have used. But it is not recommended. If you find a bug in the nvidia-docker report
it there please and try using the nvidia-docker as described above.
$ export CUDA_SO=$(\ls /usr/lib/x86_64-linux-gnu/libcuda.* | xargs -I{} echo '-v {}:{}')
$ export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
$ docker run -it -p 8888:8888 $CUDA_SO $DEVICES gcr.io/tensorflow/tensorflow:latest-gpu
================================================
FILE: docker/devel.Dockerfile
================================================
FROM ubuntu:18.04
RUN mkdir /app
WORKDIR /app
RUN apt-get update && apt-get install -y --no-install-recommends \
python3.6 \
curl \
ca-certificates \
build-essential \
python3.6-dev \
python3-distutils \
git \
gcc \
&& \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
python3.6 get-pip.py && \
rm get-pip.py
RUN git clone https://github.com/keras-team/autokeras.git
RUN python3.6 -m pip install -U keras
RUN cd autokeras && python3.6 -m pip install -U . && cd ..
RUN rm -rf autokeras
CMD /bin/bash
================================================
FILE: docker/pre-commit.Dockerfile
================================================
FROM python:3.7
RUN pip install flake8 black isort
WORKDIR /autokeras
CMD ["python", "docker/pre_commit.py"]
================================================
FILE: docker/pre_commit.py
================================================
from subprocess import CalledProcessError
from subprocess import check_call
def check_bash_call(string):
check_call(["bash", "-c", string])
def _run_format_and_flake8():
files_changed = False
try:
check_bash_call("sh shell/lint.sh")
except CalledProcessError:
check_bash_call("sh shell/format.sh")
files_changed = True
if files_changed:
print("Some files have changed.")
print("Please do git add and git commit again")
else:
print("No formatting needed.")
if files_changed:
exit(1)
def run_format_and_flake8():
try:
_run_format_and_flake8()
except CalledProcessError as error:
print("Pre-commit returned exit code", error.returncode)
exit(error.returncode)
if __name__ == "__main__":
run_format_and_flake8()
================================================
FILE: docs/README.md
================================================
# AutoKeras Documentation
The source for AutoKeras documentation is in this directory.
Our documentation uses extended Markdown, as implemented by [MkDocs](http://mkdocs.org).
## Building the documentation
- Install dependencies: `pip install -r docs/requirements.txt`
- `pip install -e .` to make sure that Python will import your modified version of AutoKeras.
- From the root directory, `cd` into the `docs/` folder and run:
- `python autogen.py`
- `mkdocs serve` # Starts a local webserver: [localhost:8000](http://localhost:8000)
- `mkdocs build` # Builds a static site in `site/` directory
## Generate contributors list
- Prerequisites:
- Install Pillow: `pip install Pillow`
- Generate:
- Run: `sh shell/contributors.sh`
- The generated file is: `docs/templates/img/contributors.svg`
================================================
FILE: docs/autogen.py
================================================
import os
import pathlib
import shutil
import keras_autodoc
import tutobooks
PAGES = {
"image_classifier.md": [
"autokeras.ImageClassifier",
"autokeras.ImageClassifier.fit",
"autokeras.ImageClassifier.predict",
"autokeras.ImageClassifier.evaluate",
"autokeras.ImageClassifier.export_model",
],
"image_regressor.md": [
"autokeras.ImageRegressor",
"autokeras.ImageRegressor.fit",
"autokeras.ImageRegressor.predict",
"autokeras.ImageRegressor.evaluate",
"autokeras.ImageRegressor.export_model",
],
"text_classifier.md": [
"autokeras.TextClassifier",
"autokeras.TextClassifier.fit",
"autokeras.TextClassifier.predict",
"autokeras.TextClassifier.evaluate",
"autokeras.TextClassifier.export_model",
],
"text_regressor.md": [
"autokeras.TextRegressor",
"autokeras.TextRegressor.fit",
"autokeras.TextRegressor.predict",
"autokeras.TextRegressor.evaluate",
"autokeras.TextRegressor.export_model",
],
"structured_data_classifier.md": [
"autokeras.StructuredDataClassifier",
"autokeras.StructuredDataClassifier.fit",
"autokeras.StructuredDataClassifier.predict",
"autokeras.StructuredDataClassifier.evaluate",
"autokeras.StructuredDataClassifier.export_model",
],
"structured_data_regressor.md": [
"autokeras.StructuredDataRegressor",
"autokeras.StructuredDataRegressor.fit",
"autokeras.StructuredDataRegressor.predict",
"autokeras.StructuredDataRegressor.evaluate",
"autokeras.StructuredDataRegressor.export_model",
],
"auto_model.md": [
"autokeras.AutoModel",
"autokeras.AutoModel.fit",
"autokeras.AutoModel.predict",
"autokeras.AutoModel.evaluate",
"autokeras.AutoModel.export_model",
],
"base.md": [
"autokeras.Node",
"autokeras.Block",
"autokeras.Block.build",
"autokeras.Head",
],
"node.md": [
"autokeras.ImageInput",
"autokeras.Input",
"autokeras.TextInput",
"autokeras.StructuredDataInput",
],
"block.md": [
"autokeras.ConvBlock",
"autokeras.DenseBlock",
"autokeras.Embedding",
"autokeras.Merge",
"autokeras.ResNetBlock",
"autokeras.RNNBlock",
"autokeras.SpatialReduction",
"autokeras.TemporalReduction",
"autokeras.XceptionBlock",
"autokeras.StructuredDataBlock",
"autokeras.ImageBlock",
"autokeras.TextBlock",
"autokeras.ImageAugmentation",
"autokeras.Normalization",
"autokeras.ClassificationHead",
"autokeras.RegressionHead",
],
}
aliases_needed = [
"keras.callbacks.Callback",
"keras.losses.Loss",
"keras.metrics.Metric",
]
ROOT = "http://autokeras.com/"
autokeras_dir = pathlib.Path(__file__).resolve().parents[1]
def py_to_nb_md(dest_dir):
dir_path = "py"
for file_path in os.listdir("py/"):
file_name = file_path
py_path = os.path.join(dir_path, file_path)
file_name_no_ext = os.path.splitext(file_name)[0]
ext = os.path.splitext(file_name)[1]
if ext != ".py":
continue
nb_path = os.path.join("ipynb", file_name_no_ext + ".ipynb")
md_path = os.path.join(dest_dir, "tutorial", file_name_no_ext + ".md")
tutobooks.py_to_md(py_path, nb_path, md_path, "templates/img")
github_repo_dir = "keras-team/autokeras/blob/master/docs/"
with open(md_path, "r") as md_file:
button_lines = [
":material-link: "
"[**View in Colab**](https://colab.research.google.com/github/"
+ github_repo_dir
+ "ipynb/"
+ file_name_no_ext
+ ".ipynb"
+ ") "
# + '•'
+ ":octicons-mark-github-16: "
"[**GitHub source**](https://github.com/"
+ github_repo_dir
+ "py/"
+ file_name_no_ext
+ ".py)",
"\n",
]
md_content = "".join(button_lines) + "\n" + md_file.read()
with open(md_path, "w") as md_file:
md_file.write(md_content)
def generate(dest_dir):
template_dir = autokeras_dir / "docs" / "templates"
doc_generator = keras_autodoc.DocumentationGenerator(
PAGES,
"https://github.com/keras-team/autokeras/blob/master",
template_dir,
autokeras_dir / "examples",
extra_aliases=aliases_needed,
)
doc_generator.generate(dest_dir)
readme = (autokeras_dir / "README.md").read_text()
index = (template_dir / "index.md").read_text()
index = index.replace("{{autogenerated}}", readme[readme.find("##") :])
(dest_dir / "index.md").write_text(index, encoding="utf-8")
shutil.copyfile(
autokeras_dir / ".github" / "CONTRIBUTING.md",
dest_dir / "contributing.md",
)
py_to_nb_md(dest_dir)
if __name__ == "__main__":
generate(autokeras_dir / "docs" / "sources")
================================================
FILE: docs/ipynb/customized.ipynb
================================================
{
"cells": [
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"!export KERAS_BACKEND=\"torch\"\n",
"!pip install autokeras"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"import keras\n",
"import numpy as np\n",
"import tree\n",
"from keras.datasets import mnist\n",
"\n",
"import autokeras as ak"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"In this tutorial, we show how to customize your search space with\n",
"[AutoModel](/auto_model/#automodel-class) and how to implement your own block\n",
"as search space. This API is mainly for advanced users who already know what\n",
"their model should look like.\n",
"\n",
"## Customized Search Space\n",
"First, let us see how we can build the following neural network using the\n",
"building blocks in AutoKeras.\n",
"\n",
"
\n",
"\n",
"We can make use of the [AutoModel](/auto_model/#automodel-class) API in\n",
"AutoKeras to implemented as follows.\n",
"The usage is the same as the [Keras functional\n",
"API](https://keras.io/api/models/model/#with-the-functional-api).\n",
"Since this is just a demo, we use small amount of `max_trials` and `epochs`.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"input_node = ak.ImageInput()\n",
"output_node = ak.Normalization()(input_node)\n",
"output_node1 = ak.ConvBlock()(output_node)\n",
"output_node2 = ak.ResNetBlock(version=\"v2\")(output_node)\n",
"output_node = ak.Merge()([output_node1, output_node2])\n",
"output_node = ak.ClassificationHead()(output_node)\n",
"\n",
"auto_model = ak.AutoModel(\n",
" inputs=input_node, outputs=output_node, overwrite=True, max_trials=1\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"Whild building the model, the blocks used need to follow this topology:\n",
"`Preprocessor` -> `Block` -> `Head`. `Normalization` and `ImageAugmentation`\n",
"are `Preprocessor`s.\n",
"`ClassificationHead` is `Head`. The rest are `Block`s.\n",
"\n",
"In the code above, we use `ak.ResNetBlock(version='v2')` to specify the version\n",
"of ResNet to use. There are many other arguments to specify for each building\n",
"block. For most of the arguments, if not specified, they would be tuned\n",
"automatically. Please refer to the documentation links at the bottom of the\n",
"page for more details.\n",
"\n",
"Then, we prepare some data to run the model.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
"print(x_train.shape) # (60000, 28, 28)\n",
"print(y_train.shape) # (60000,)\n",
"print(y_train[:3]) # array([7, 2, 1], dtype=uint8)\n",
"\n",
"# Feed the AutoModel with training data.\n",
"auto_model.fit(x_train[:100], y_train[:100], epochs=1)\n",
"# Predict with the best model.\n",
"predicted_y = auto_model.predict(x_test)\n",
"# Evaluate the best model with testing data.\n",
"print(auto_model.evaluate(x_test, y_test))"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"For multiple input nodes and multiple heads search space, you can refer to\n",
"[this section](/tutorial/multi/#customized-search-space).\n",
"\n",
"## Validation Data\n",
"If you would like to provide your own validation data or change the ratio of\n",
"the validation data, please refer to the Validation Data section of the\n",
"tutorials of [Image\n",
"Classification](/tutorial/image_classification/#validation-data), [Text\n",
"Classification](/tutorial/text_classification/#validation-data), [Structured\n",
"Data\n",
"Classification](/tutorial/structured_data_classification/#validation-data),\n",
"[Multi-task and Multiple Validation](/tutorial/multi/#validation-data).\n",
"\n",
"## Implement New Block\n",
"\n",
"You can extend the [Block](/base/#block-class)\n",
"class to implement your own building blocks and use it with\n",
"[AutoModel](/auto_model/#automodel-class).\n",
"\n",
"The first step is to learn how to write a build function for\n",
"[KerasTuner](https://keras-team.github.io/keras-tuner/#usage-the-basics). You\n",
"need to override the [build function](/base/#build-method) of the block. The\n",
"following example shows how to implement a single Dense layer block whose\n",
"number of neurons is tunable.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"class SingleDenseLayerBlock(ak.Block):\n",
" def build(self, hp, inputs=None):\n",
" # Get the input_node from inputs.\n",
" input_node = tree.flatten(inputs)[0]\n",
" layer = keras.layers.Dense(\n",
" hp.Int(\"num_units\", min_value=32, max_value=512, step=32)\n",
" )\n",
" output_node = layer(input_node)\n",
" return output_node"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"You can connect it with other blocks and build it into an\n",
"[AutoModel](/auto_model/#automodel-class).\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"# Build the AutoModel\n",
"input_node = ak.Input()\n",
"output_node = SingleDenseLayerBlock()(input_node)\n",
"output_node = ak.RegressionHead()(output_node)\n",
"auto_model = ak.AutoModel(input_node, output_node, overwrite=True, max_trials=1)\n",
"# Prepare Data\n",
"num_instances = 100\n",
"x_train = np.random.rand(num_instances, 20).astype(np.float32)\n",
"y_train = np.random.rand(num_instances, 1).astype(np.float32)\n",
"x_test = np.random.rand(num_instances, 20).astype(np.float32)\n",
"y_test = np.random.rand(num_instances, 1).astype(np.float32)\n",
"# Train the model\n",
"auto_model.fit(x_train, y_train, epochs=1)\n",
"print(auto_model.evaluate(x_test, y_test))"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"## Reference\n",
"\n",
"[AutoModel](/auto_model/#automodel-class)\n",
"\n",
"**Nodes**:\n",
"[ImageInput](/node/#imageinput-class),\n",
"[Input](/node/#input-class),\n",
"[TextInput](/node/#textinput-class).\n",
"[StructuredDataInput](/node/#structureddatainput-class),\n",
"\n",
"**Preprocessors**:\n",
"[FeatureEngineering](/block/#featureengineering-class),\n",
"[ImageAugmentation](/block/#imageaugmentation-class),\n",
"[LightGBM](/block/#lightgbm-class),\n",
"[Normalization](/block/#normalization-class),\n",
"\n",
"**Blocks**:\n",
"[ConvBlock](/block/#convblock-class),\n",
"[DenseBlock](/block/#denseblock-class),\n",
"[Embedding](/block/#embedding-class),\n",
"[Merge](/block/#merge-class),\n",
"[ResNetBlock](/block/#resnetblock-class),\n",
"[RNNBlock](/block/#rnnblock-class),\n",
"[SpatialReduction](/block/#spatialreduction-class),\n",
"[TemporalReduction](/block/#temporalreduction-class),\n",
"[XceptionBlock](/block/#xceptionblock-class),\n",
"[ImageBlock](/block/#imageblock-class),\n",
"[TextBlock](/block/#textblock-class).\n",
"[StructuredDataBlock](/block/#structureddatablock-class),\n"
]
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"name": "customized",
"private_outputs": false,
"provenance": [],
"toc_visible": true
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
================================================
FILE: docs/ipynb/export.ipynb
================================================
{
"cells": [
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"!export KERAS_BACKEND=\"torch\"\n",
"!pip install autokeras"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"import numpy as np\n",
"from keras.datasets import mnist\n",
"from keras.models import load_model\n",
"\n",
"import autokeras as ak"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"You can easily export your model the best model found by AutoKeras as a Keras\n",
"Model.\n",
"\n",
"The following example uses [ImageClassifier](/image_classifier) as an example.\n",
"All the tasks and the [AutoModel](/auto_model/#automodel-class) has this\n",
"[export_model](/auto_model/#export_model-method) function.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
"\n",
"# Initialize the image classifier.\n",
"clf = ak.ImageClassifier(\n",
" overwrite=True, max_trials=1\n",
") # Try only 1 model.(Increase accordingly)\n",
"# Feed the image classifier with training data.\n",
"clf.fit(x_train, y_train, epochs=1) # Change no of epochs to improve the model\n",
"# Export as a Keras Model.\n",
"model = clf.export_model()\n",
"\n",
"print(type(model)) # \n",
"\n",
"model.save(\"model_autokeras.keras\")\n",
"\n",
"\n",
"loaded_model = load_model(\n",
" \"model_autokeras.keras\", custom_objects=ak.CUSTOM_OBJECTS\n",
")\n",
"\n",
"predicted_y = loaded_model.predict(np.expand_dims(x_test, -1))\n",
"print(predicted_y)"
]
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"name": "export",
"private_outputs": false,
"provenance": [],
"toc_visible": true
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
================================================
FILE: docs/ipynb/image_classification.ipynb
================================================
{
"cells": [
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"!export KERAS_BACKEND=\"torch\"\n",
"!pip install autokeras"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"from keras.datasets import mnist\n",
"\n",
"import autokeras as ak"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"## A Simple Example\n",
"The first step is to prepare your data. Here we use the MNIST dataset as an\n",
"example\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
"x_train = x_train[:100]\n",
"y_train = y_train[:100]\n",
"x_test = x_test[:100]\n",
"y_test = y_test[:100]\n",
"print(x_train.shape) # (60000, 28, 28)\n",
"print(y_train.shape) # (60000,)\n",
"print(y_train[:3]) # array([7, 2, 1], dtype=uint8)"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"The second step is to run the ImageClassifier.\n",
"It is recommended have more trials for more complicated datasets.\n",
"This is just a quick demo of MNIST, so we set max_trials to 1.\n",
"For the same reason, we set epochs to 10.\n",
"You can also leave the epochs unspecified for an adaptive number of epochs.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"# Initialize the image classifier.\n",
"clf = ak.ImageClassifier(overwrite=True, max_trials=1)\n",
"# Feed the image classifier with training data.\n",
"clf.fit(x_train, y_train, epochs=1)\n",
"\n",
"\n",
"# Predict with the best model.\n",
"predicted_y = clf.predict(x_test)\n",
"print(predicted_y)\n",
"\n",
"\n",
"# Evaluate the best model with testing data.\n",
"print(clf.evaluate(x_test, y_test))"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"## Validation Data\n",
"By default, AutoKeras use the last 20% of training data as validation data. As\n",
"shown in the example below, you can use validation_split to specify the\n",
"percentage.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"clf.fit(\n",
" x_train,\n",
" y_train,\n",
" # Split the training data and use the last 15% as validation data.\n",
" validation_split=0.15,\n",
" epochs=1,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"You can also use your own validation set instead of splitting it from the\n",
"training data with validation_data.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"split = 50000\n",
"x_val = x_train[split:]\n",
"y_val = y_train[split:]\n",
"x_train = x_train[:split]\n",
"y_train = y_train[:split]\n",
"clf.fit(\n",
" x_train,\n",
" y_train,\n",
" # Use your own validation set.\n",
" validation_data=(x_val, y_val),\n",
" epochs=1,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"## Customized Search Space\n",
"For advanced users, you may customize your search space by using AutoModel\n",
"instead of ImageClassifier. You can configure the ImageBlock for some\n",
"high-level configurations, e.g., block_type for the type of neural network to\n",
"search, normalize for whether to do data normalization, augment for whether to\n",
"do data augmentation. You can also do not specify these arguments, which would\n",
"leave the different choices to be tuned automatically. See the following\n",
"example for detail.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"input_node = ak.ImageInput()\n",
"output_node = ak.ImageBlock(\n",
" # Only search ResNet architectures.\n",
" block_type=\"resnet\",\n",
" # Normalize the dataset.\n",
" normalize=True,\n",
" # Do not do data augmentation.\n",
" augment=False,\n",
")(input_node)\n",
"output_node = ak.ClassificationHead()(output_node)\n",
"clf = ak.AutoModel(\n",
" inputs=input_node, outputs=output_node, overwrite=True, max_trials=1\n",
")\n",
"clf.fit(x_train, y_train, epochs=1)"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"The usage of AutoModel is similar to the functional API of Keras. Basically, you\n",
"are building a graph, whose edges are blocks and the nodes are intermediate\n",
"outputs of blocks. To add an edge from input_node to output_node with\n",
"output_node = ak.[some_block]([block_args])(input_node).\n",
"\n",
"You can even also use more fine grained blocks to customize the search space\n",
"even further. See the following example.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"input_node = ak.ImageInput()\n",
"output_node = ak.Normalization()(input_node)\n",
"output_node = ak.ImageAugmentation(horizontal_flip=False)(output_node)\n",
"output_node = ak.ResNetBlock(version=\"v2\")(output_node)\n",
"output_node = ak.ClassificationHead()(output_node)\n",
"clf = ak.AutoModel(\n",
" inputs=input_node, outputs=output_node, overwrite=True, max_trials=1\n",
")\n",
"clf.fit(x_train, y_train, epochs=1)"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"## Reference\n",
"[ImageClassifier](/image_classifier),\n",
"[AutoModel](/auto_model/#automodel-class),\n",
"[ImageBlock](/block/#imageblock-class),\n",
"[Normalization](/block/#normalization-class),\n",
"[ImageAugmentation](/block/#image-augmentation-class),\n",
"[ResNetBlock](/block/#resnetblock-class),\n",
"[ImageInput](/node/#imageinput-class),\n",
"[ClassificationHead](/block/#classificationhead-class).\n"
]
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"name": "image_classification",
"private_outputs": false,
"provenance": [],
"toc_visible": true
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
================================================
FILE: docs/ipynb/image_regression.ipynb
================================================
{
"cells": [
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"!export KERAS_BACKEND=\"torch\"\n",
"!pip install autokeras"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"from keras.datasets import mnist\n",
"\n",
"import autokeras as ak"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"To make this tutorial easy to follow, we just treat MNIST dataset as a\n",
"regression dataset. It means we will treat prediction targets of MNIST dataset,\n",
"which are integers ranging from 0 to 9 as numerical values, so that they can be\n",
"directly used as the regression targets.\n",
"\n",
"## A Simple Example\n",
"The first step is to prepare your data. Here we use the MNIST dataset as an\n",
"example\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
"x_train = x_train[:100]\n",
"y_train = y_train[:100]\n",
"x_test = x_test[:100]\n",
"y_test = y_test[:100]\n",
"print(x_train.shape) # (60000, 28, 28)\n",
"print(y_train.shape) # (60000,)\n",
"print(y_train[:3]) # array([7, 2, 1], dtype=uint8)"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"The second step is to run the ImageRegressor. It is recommended have more\n",
"trials for more complicated datasets. This is just a quick demo of MNIST, so\n",
"we set max_trials to 1. For the same reason, we set epochs to 1. You can also\n",
"leave the epochs unspecified for an adaptive number of epochs.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"# Initialize the image regressor.\n",
"reg = ak.ImageRegressor(overwrite=True, max_trials=1)\n",
"# Feed the image regressor with training data.\n",
"reg.fit(x_train, y_train, epochs=1)\n",
"\n",
"\n",
"# Predict with the best model.\n",
"predicted_y = reg.predict(x_test)\n",
"print(predicted_y)\n",
"\n",
"\n",
"# Evaluate the best model with testing data.\n",
"print(reg.evaluate(x_test, y_test))"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"## Validation Data\n",
"By default, AutoKeras use the last 20% of training data as validation data. As\n",
"shown in the example below, you can use validation_split to specify the\n",
"percentage.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"reg.fit(\n",
" x_train,\n",
" y_train,\n",
" # Split the training data and use the last 15% as validation data.\n",
" validation_split=0.15,\n",
" epochs=1,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"You can also use your own validation set instead of splitting it from the\n",
"training data with validation_data.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"split = 50000\n",
"x_val = x_train[split:]\n",
"y_val = y_train[split:]\n",
"x_train = x_train[:split]\n",
"y_train = y_train[:split]\n",
"reg.fit(\n",
" x_train,\n",
" y_train,\n",
" # Use your own validation set.\n",
" validation_data=(x_val, y_val),\n",
" epochs=2,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"## Customized Search Space\n",
"For advanced users, you may customize your search space by using AutoModel\n",
"instead of ImageRegressor. You can configure the ImageBlock for some high-level\n",
"configurations, e.g., block_type for the type of neural network to search,\n",
"normalize for whether to do data normalization, augment for whether to do data\n",
"augmentation. You can also do not specify these arguments, which would leave\n",
"the different choices to be tuned automatically. See the following example for\n",
"detail.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"input_node = ak.ImageInput()\n",
"output_node = ak.ImageBlock(\n",
" # Only search ResNet architectures.\n",
" block_type=\"resnet\",\n",
" # Normalize the dataset.\n",
" normalize=False,\n",
" # Do not do data augmentation.\n",
" augment=False,\n",
")(input_node)\n",
"output_node = ak.RegressionHead()(output_node)\n",
"reg = ak.AutoModel(\n",
" inputs=input_node, outputs=output_node, overwrite=True, max_trials=1\n",
")\n",
"reg.fit(x_train, y_train, epochs=1)"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"The usage of AutoModel is similar to the functional API of Keras. Basically,\n",
"you are building a graph, whose edges are blocks and the nodes are intermediate\n",
"outputs of blocks. To add an edge from input_node to output_node with\n",
"output_node = ak.[some_block]([block_args])(input_node).\n",
"\n",
"You can even also use more fine grained blocks to customize the search space\n",
"even further. See the following example.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"input_node = ak.ImageInput()\n",
"output_node = ak.Normalization()(input_node)\n",
"output_node = ak.ImageAugmentation(horizontal_flip=False)(output_node)\n",
"output_node = ak.ResNetBlock(version=\"v2\")(output_node)\n",
"output_node = ak.RegressionHead()(output_node)\n",
"reg = ak.AutoModel(\n",
" inputs=input_node, outputs=output_node, overwrite=True, max_trials=1\n",
")\n",
"reg.fit(x_train, y_train, epochs=1)"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"## Reference\n",
"[ImageRegressor](/image_regressor),\n",
"[AutoModel](/auto_model/#automodel-class),\n",
"[ImageBlock](/block/#imageblock-class),\n",
"[Normalization](/block/#normalization-class),\n",
"[ImageAugmentation](/block/#image-augmentation-class),\n",
"[ResNetBlock](/block/#resnetblock-class),\n",
"[ImageInput](/node/#imageinput-class),\n",
"[RegressionHead](/block/#regressionhead-class).\n"
]
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"name": "image_regression",
"private_outputs": false,
"provenance": [],
"toc_visible": true
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
================================================
FILE: docs/ipynb/multi.ipynb
================================================
{
"cells": [
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"!export KERAS_BACKEND=\"torch\"\n",
"!pip install autokeras"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"import autokeras as ak"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"In this tutorial we are making use of the\n",
"[AutoModel](/auto_model/#automodel-class)\n",
" API to show how to handle multi-modal data and multi-task.\n",
"\n",
"## What is multi-modal?\n",
"\n",
"Multi-modal data means each data instance has multiple forms of information.\n",
"For example, a photo can be saved as a image. Besides the image, it may also\n",
"have when and where it was taken as its attributes, which can be represented as\n",
"numerical data.\n",
"\n",
"## What is multi-task?\n",
"\n",
"Multi-task here we refer to we want to predict multiple targets with the same\n",
"input features. For example, we not only want to classify an image according to\n",
"its content, but we also want to regress its quality as a float number between\n",
"0 and 1.\n",
"\n",
"The following diagram shows an example of multi-modal and multi-task neural\n",
"network model.\n",
"\n",
"
\n",
"\n",
"It has two inputs the images and the numerical input data. Each image is\n",
"associated with a set of attributes in the numerical input data. From these\n",
"data, we are trying to predict the classification label and the regression value\n",
"at the same time.\n",
"\n",
"## Data Preparation\n",
"\n",
"To illustrate our idea, we generate some random image and numerical data as\n",
"the multi-modal data.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"num_instances = 10\n",
"# Generate image data.\n",
"image_data = np.random.rand(num_instances, 32, 32, 3).astype(np.float32)\n",
"# Generate numerical data.\n",
"numerical_data = np.random.rand(num_instances, 20).astype(np.float32)"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"We also generate some multi-task targets for classification and regression.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"# Generate regression targets.\n",
"regression_target = np.random.rand(num_instances, 1).astype(np.float32)\n",
"# Generate classification labels of five classes.\n",
"classification_target = np.random.randint(5, size=num_instances)"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"## Build and Train the Model\n",
"Then we initialize the multi-modal and multi-task model with\n",
"[AutoModel](/auto_model/#automodel-class).\n",
"Since this is just a demo, we use small amount of `max_trials` and `epochs`.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"# Initialize the multi with multiple inputs and outputs.\n",
"model = ak.AutoModel(\n",
" inputs=[ak.ImageInput(), ak.Input()],\n",
" outputs=[\n",
" ak.RegressionHead(metrics=[\"mae\"]),\n",
" ak.ClassificationHead(\n",
" loss=\"categorical_crossentropy\", metrics=[\"accuracy\"]\n",
" ),\n",
" ],\n",
" overwrite=True,\n",
" max_trials=2,\n",
")\n",
"# Fit the model with prepared data.\n",
"model.fit(\n",
" [image_data, numerical_data],\n",
" [regression_target, classification_target],\n",
" epochs=1,\n",
" batch_size=3,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"## Validation Data\n",
"By default, AutoKeras use the last 20% of training data as validation data.\n",
"As shown in the example below, you can use `validation_split` to specify the\n",
"percentage.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"model.fit(\n",
" [image_data, numerical_data],\n",
" [regression_target, classification_target],\n",
" # Split the training data and use the last 15% as validation data.\n",
" validation_split=0.15,\n",
" epochs=1,\n",
" batch_size=3,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"You can also use your own validation set\n",
"instead of splitting it from the training data with `validation_data`.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"split = 5\n",
"\n",
"image_val = image_data[split:]\n",
"numerical_val = numerical_data[split:]\n",
"regression_val = regression_target[split:]\n",
"classification_val = classification_target[split:]\n",
"\n",
"image_data = image_data[:split]\n",
"numerical_data = numerical_data[:split]\n",
"regression_target = regression_target[:split]\n",
"classification_target = classification_target[:split]\n",
"\n",
"model.fit(\n",
" [image_data, numerical_data],\n",
" [regression_target, classification_target],\n",
" # Use your own validation set.\n",
" validation_data=(\n",
" [image_val, numerical_val],\n",
" [regression_val, classification_val],\n",
" ),\n",
" epochs=1,\n",
" batch_size=3,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"## Customized Search Space\n",
"You can customize your search space.\n",
"The following figure shows the search space we want to define.\n",
"\n",
"
\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"input_node1 = ak.ImageInput()\n",
"output_node = ak.Normalization()(input_node1)\n",
"output_node = ak.ImageAugmentation()(output_node)\n",
"output_node1 = ak.ConvBlock()(output_node)\n",
"output_node2 = ak.ResNetBlock(version=\"v2\")(output_node)\n",
"output_node1 = ak.Merge()([output_node1, output_node2])\n",
"\n",
"input_node2 = ak.Input()\n",
"output_node2 = ak.DenseBlock()(input_node2)\n",
"\n",
"output_node = ak.Merge()([output_node1, output_node2])\n",
"output_node1 = ak.ClassificationHead()(output_node)\n",
"output_node2 = ak.RegressionHead()(output_node)\n",
"\n",
"auto_model = ak.AutoModel(\n",
" inputs=[input_node1, input_node2],\n",
" outputs=[output_node1, output_node2],\n",
" overwrite=True,\n",
" max_trials=2,\n",
")\n",
"\n",
"image_data = np.random.rand(num_instances, 32, 32, 3).astype(np.float32)\n",
"numerical_data = np.random.rand(num_instances, 20).astype(np.float32)\n",
"regression_target = np.random.rand(num_instances, 1).astype(np.float32)\n",
"classification_target = np.random.randint(5, size=num_instances)\n",
"\n",
"auto_model.fit(\n",
" [image_data, numerical_data],\n",
" [classification_target, regression_target],\n",
" batch_size=3,\n",
" epochs=1,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"## Reference\n",
"[AutoModel](/auto_model/#automodel-class),\n",
"[ImageInput](/node/#imageinput-class),\n",
"[Input](/node/#input-class),\n",
"[DenseBlock](/block/#denseblock-class),\n",
"[RegressionHead](/block/#regressionhead-class),\n",
"[ClassificationHead](/block/#classificationhead-class),\n",
"[CategoricalToNumerical](/block/#categoricaltonumerical-class).\n"
]
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"name": "multi",
"private_outputs": false,
"provenance": [],
"toc_visible": true
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
================================================
FILE: docs/ipynb/structured_data_classification.ipynb
================================================
{
"cells": [
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"!export KERAS_BACKEND=\"torch\"\n",
"!pip install autokeras"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"import keras\n",
"import pandas as pd\n",
"\n",
"import autokeras as ak"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"## A Simple Example\n",
"The first step is to prepare your data. Here we use the [Titanic\n",
"dataset](https://www.kaggle.com/c/titanic) as an example.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"TRAIN_DATA_URL = \"https://storage.googleapis.com/tf-datasets/titanic/train.csv\"\n",
"TEST_DATA_URL = \"https://storage.googleapis.com/tf-datasets/titanic/eval.csv\"\n",
"\n",
"train_file_path = keras.utils.get_file(\"train.csv\", TRAIN_DATA_URL)\n",
"test_file_path = keras.utils.get_file(\"eval.csv\", TEST_DATA_URL)\n",
"\n",
"# Load data into numpy arrays\n",
"train_df = pd.read_csv(train_file_path)\n",
"test_df = pd.read_csv(test_file_path)\n",
"\n",
"y_train = train_df[\"survived\"].values\n",
"x_train = train_df.drop(\"survived\", axis=1).values\n",
"\n",
"y_test = test_df[\"survived\"].values\n",
"x_test = test_df.drop(\"survived\", axis=1).values"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"The second step is to run the\n",
"[StructuredDataClassifier](/structured_data_classifier).\n",
"As a quick demo, we set epochs to 10.\n",
"You can also leave the epochs unspecified for an adaptive number of epochs.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"# Initialize the structured data classifier.\n",
"clf = ak.StructuredDataClassifier(\n",
" overwrite=True, max_trials=3\n",
") # It tries 3 different models.\n",
"# Feed the structured data classifier with training data.\n",
"clf.fit(\n",
" x_train,\n",
" y_train,\n",
" epochs=10,\n",
")\n",
"# Predict with the best model.\n",
"predicted_y = clf.predict(x_test)\n",
"# Evaluate the best model with testing data.\n",
"print(clf.evaluate(x_test, y_test))"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"You can also specify the column names and types for the data as follows. The\n",
"`column_names` is optional if the training data already have the column names,\n",
"e.g. pandas.DataFrame, CSV file. Any column, whose type is not specified will\n",
"be inferred from the training data.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"# Initialize the structured data classifier.\n",
"clf = ak.StructuredDataClassifier(\n",
" column_names=[\n",
" \"sex\",\n",
" \"age\",\n",
" \"n_siblings_spouses\",\n",
" \"parch\",\n",
" \"fare\",\n",
" \"class\",\n",
" \"deck\",\n",
" \"embark_town\",\n",
" \"alone\",\n",
" ],\n",
" column_types={\"sex\": \"categorical\", \"fare\": \"numerical\"},\n",
" max_trials=10, # It tries 10 different models.\n",
" overwrite=True,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"## Validation Data\n",
"By default, AutoKeras use the last 20% of training data as validation data. As\n",
"shown in the example below, you can use `validation_split` to specify the\n",
"percentage.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"clf.fit(\n",
" x_train,\n",
" y_train,\n",
" # Split the training data and use the last 15% as validation data.\n",
" validation_split=0.15,\n",
" epochs=10,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"You can also use your own validation set\n",
"instead of splitting it from the training data with `validation_data`.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"split = 500\n",
"x_val = x_train[split:]\n",
"y_val = y_train[split:]\n",
"x_train = x_train[:split]\n",
"y_train = y_train[:split]\n",
"clf.fit(\n",
" x_train,\n",
" y_train,\n",
" # Use your own validation set.\n",
" validation_data=(x_val, y_val),\n",
" epochs=10,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"## Reference\n",
"[StructuredDataClassifier](/structured_data_classifier),\n",
"[AutoModel](/auto_model/#automodel-class),\n",
"[StructuredDataBlock](/block/#structureddatablock-class),\n",
"[DenseBlock](/block/#denseblock-class),\n",
"[StructuredDataInput](/node/#structureddatainput-class),\n",
"[ClassificationHead](/block/#classificationhead-class),\n"
]
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"name": "structured_data_classification",
"private_outputs": false,
"provenance": [],
"toc_visible": true
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
================================================
FILE: docs/ipynb/structured_data_regression.ipynb
================================================
{
"cells": [
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"!export KERAS_BACKEND=\"torch\"\n",
"!pip install autokeras"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"from sklearn.datasets import fetch_california_housing\n",
"\n",
"import autokeras as ak"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"## A Simple Example\n",
"The first step is to prepare your data. Here we use the [California housing\n",
"dataset](\n",
"https://scikit-learn.org/stable/datasets/real_world.html#california-housing-dataset)\n",
"as an example.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"house_dataset = fetch_california_housing()\n",
"train_size = int(house_dataset.data.shape[0] * 0.9)\n",
"\n",
"x_train = house_dataset.data[:train_size]\n",
"y_train = house_dataset.target[:train_size]\n",
"x_test = house_dataset.data[train_size:]\n",
"y_test = house_dataset.target[train_size:]"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"The second step is to run the\n",
"[StructuredDataRegressor](/structured_data_regressor).\n",
"As a quick demo, we set epochs to 10.\n",
"You can also leave the epochs unspecified for an adaptive number of epochs.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"# Initialize the structured data regressor.\n",
"reg = ak.StructuredDataRegressor(\n",
" overwrite=True, max_trials=3\n",
") # It tries 3 different models.\n",
"# Feed the structured data regressor with training data.\n",
"reg.fit(\n",
" x_train,\n",
" y_train,\n",
" epochs=10,\n",
")\n",
"# Predict with the best model.\n",
"predicted_y = reg.predict(x_test)\n",
"# Evaluate the best model with testing data.\n",
"print(reg.evaluate(x_test, y_test))"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"You can also specify the column names and types for the data as follows. The\n",
"`column_names` is optional if the training data already have the column names,\n",
"e.g. pandas.DataFrame, CSV file. Any column, whose type is not specified will\n",
"be inferred from the training data.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"# Initialize the structured data regressor.\n",
"reg = ak.StructuredDataRegressor(\n",
" column_names=[\n",
" \"MedInc\",\n",
" \"HouseAge\",\n",
" \"AveRooms\",\n",
" \"AveBedrms\",\n",
" \"Population\",\n",
" \"AveOccup\",\n",
" \"Latitude\",\n",
" \"Longitude\",\n",
" ],\n",
" column_types={\"MedInc\": \"numerical\", \"Latitude\": \"numerical\"},\n",
" max_trials=10, # It tries 10 different models.\n",
" overwrite=True,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"## Validation Data\n",
"By default, AutoKeras use the last 20% of training data as validation data. As\n",
"shown in the example below, you can use `validation_split` to specify the\n",
"percentage.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"reg.fit(\n",
" x_train,\n",
" y_train,\n",
" # Split the training data and use the last 15% as validation data.\n",
" validation_split=0.15,\n",
" epochs=10,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"You can also use your own validation set\n",
"instead of splitting it from the training data with `validation_data`.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"split = 500\n",
"x_val = x_train[split:]\n",
"y_val = y_train[split:]\n",
"x_train = x_train[:split]\n",
"y_train = y_train[:split]\n",
"reg.fit(\n",
" x_train,\n",
" y_train,\n",
" # Use your own validation set.\n",
" validation_data=(x_val, y_val),\n",
" epochs=10,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"## Reference\n",
"[StructuredDataRegressor](/structured_data_regressor),\n",
"[AutoModel](/auto_model/#automodel-class),\n",
"[StructuredDataBlock](/block/#structureddatablock-class),\n",
"[DenseBlock](/block/#denseblock-class),\n",
"[StructuredDataInput](/node/#structureddatainput-class),\n",
"[RegressionHead](/block/#regressionhead-class),\n"
]
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"name": "structured_data_regression",
"private_outputs": false,
"provenance": [],
"toc_visible": true
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
================================================
FILE: docs/ipynb/text_classification.ipynb
================================================
{
"cells": [
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"!export KERAS_BACKEND=\"torch\"\n",
"!pip install autokeras"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"import os\n",
"\n",
"import keras\n",
"import numpy as np\n",
"from sklearn.datasets import load_files\n",
"\n",
"import autokeras as ak"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"## A Simple Example\n",
"The first step is to prepare your data. Here we use the [IMDB\n",
"dataset](https://keras.io/datasets/#imdb-movie-reviews-sentiment-classification)\n",
"as an example.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"dataset = keras.utils.get_file(\n",
" fname=\"aclImdb.tar.gz\",\n",
" origin=\"http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\",\n",
" extract=True,\n",
")\n",
"\n",
"# set path to dataset\n",
"IMDB_DATADIR = os.path.join(\n",
" os.path.dirname(dataset), \"aclImdb_extracted\", \"aclImdb\"\n",
")\n",
"\n",
"classes = [\"pos\", \"neg\"]\n",
"train_data = load_files(\n",
" os.path.join(IMDB_DATADIR, \"train\"), shuffle=True, categories=classes\n",
")\n",
"test_data = load_files(\n",
" os.path.join(IMDB_DATADIR, \"test\"), shuffle=False, categories=classes\n",
")\n",
"\n",
"x_train = np.array(train_data.data)[:100]\n",
"y_train = np.array(train_data.target)[:100]\n",
"x_test = np.array(test_data.data)[:100]\n",
"y_test = np.array(test_data.target)[:100]\n",
"\n",
"print(x_train.shape) # (25000,)\n",
"print(y_train.shape) # (25000, 1)\n",
"print(x_train[0][:50]) # this film was just brilliant casting"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"The second step is to run the [TextClassifier](/text_classifier). As a quick\n",
"demo, we set epochs to 2. You can also leave the epochs unspecified for an\n",
"adaptive number of epochs.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"# Initialize the text classifier.\n",
"clf = ak.TextClassifier(\n",
" overwrite=True, max_trials=1\n",
") # It only tries 1 model as a quick demo.\n",
"# Feed the text classifier with training data.\n",
"clf.fit(x_train, y_train, epochs=1, batch_size=2)\n",
"# Predict with the best model.\n",
"predicted_y = clf.predict(x_test)\n",
"# Evaluate the best model with testing data.\n",
"print(clf.evaluate(x_test, y_test))"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"## Validation Data\n",
"By default, AutoKeras use the last 20% of training data as validation data. As\n",
"shown in the example below, you can use `validation_split` to specify the\n",
"percentage.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"clf.fit(\n",
" x_train,\n",
" y_train,\n",
" # Split the training data and use the last 15% as validation data.\n",
" validation_split=0.15,\n",
" epochs=1,\n",
" batch_size=2,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"You can also use your own validation set instead of splitting it from the\n",
"training data with `validation_data`.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"split = 5\n",
"x_val = x_train[split:]\n",
"y_val = y_train[split:]\n",
"x_train = x_train[:split]\n",
"y_train = y_train[:split]\n",
"clf.fit(\n",
" x_train,\n",
" y_train,\n",
" epochs=1,\n",
" # Use your own validation set.\n",
" validation_data=(x_val, y_val),\n",
" batch_size=2,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"## Customized Search Space\n",
"For advanced users, you may customize your search space by using\n",
"[AutoModel](/auto_model/#automodel-class) instead of\n",
"[TextClassifier](/text_classifier). You can configure the\n",
"[TextBlock](/block/#textblock-class) for some high-level configurations. You can\n",
"also do not specify these arguments, which would leave the different choices to\n",
"be tuned automatically. See the following example for detail.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"input_node = ak.TextInput()\n",
"output_node = ak.TextBlock()(input_node)\n",
"output_node = ak.ClassificationHead()(output_node)\n",
"clf = ak.AutoModel(\n",
" inputs=input_node, outputs=output_node, overwrite=True, max_trials=1\n",
")\n",
"clf.fit(x_train, y_train, epochs=1, batch_size=2)"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"## Reference\n",
"[TextClassifier](/text_classifier),\n",
"[AutoModel](/auto_model/#automodel-class),\n",
"[ConvBlock](/block/#convblock-class),\n",
"[TextInput](/node/#textinput-class),\n",
"[ClassificationHead](/block/#classificationhead-class).\n"
]
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"name": "text_classification",
"private_outputs": false,
"provenance": [],
"toc_visible": true
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
================================================
FILE: docs/ipynb/text_regression.ipynb
================================================
{
"cells": [
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"!export KERAS_BACKEND=\"torch\"\n",
"!pip install autokeras"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"import os\n",
"\n",
"import keras\n",
"import numpy as np\n",
"from sklearn.datasets import load_files\n",
"\n",
"import autokeras as ak"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"To make this tutorial easy to follow, we just treat IMDB dataset as a\n",
"regression dataset. It means we will treat prediction targets of IMDB dataset,\n",
"which are 0s and 1s as numerical values, so that they can be directly used as\n",
"the regression targets.\n",
"\n",
"## A Simple Example\n",
"The first step is to prepare your data. Here we use the [IMDB\n",
"dataset](https://keras.io/datasets/#imdb-movie-reviews-sentiment-classification)\n",
"as an example.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"dataset = keras.utils.get_file(\n",
" fname=\"aclImdb.tar.gz\",\n",
" origin=\"http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\",\n",
" extract=True,\n",
")\n",
"\n",
"# set path to dataset\n",
"IMDB_DATADIR = os.path.join(\n",
" os.path.dirname(dataset), \"aclImdb_extracted\", \"aclImdb\"\n",
")\n",
"\n",
"classes = [\"pos\", \"neg\"]\n",
"train_data = load_files(\n",
" os.path.join(IMDB_DATADIR, \"train\"), shuffle=True, categories=classes\n",
")\n",
"test_data = load_files(\n",
" os.path.join(IMDB_DATADIR, \"test\"), shuffle=False, categories=classes\n",
")\n",
"\n",
"x_train = np.array(train_data.data)[:100]\n",
"y_train = np.array(train_data.target)[:100]\n",
"x_test = np.array(test_data.data)[:100]\n",
"y_test = np.array(test_data.target)[:100]\n",
"\n",
"print(x_train.shape) # (25000,)\n",
"print(y_train.shape) # (25000, 1)\n",
"print(x_train[0][:50]) # this film was just brilliant casting "
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"The second step is to run the [TextRegressor](/text_regressor). As a quick\n",
"demo, we set epochs to 2. You can also leave the epochs unspecified for an\n",
"adaptive number of epochs.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"# Initialize the text regressor.\n",
"reg = ak.TextRegressor(\n",
" overwrite=True, max_trials=1 # It tries 10 different models.\n",
")\n",
"# Feed the text regressor with training data.\n",
"reg.fit(x_train, y_train, epochs=1, batch_size=2)\n",
"# Predict with the best model.\n",
"predicted_y = reg.predict(x_test)\n",
"# Evaluate the best model with testing data.\n",
"print(reg.evaluate(x_test, y_test))"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"## Validation Data\n",
"By default, AutoKeras use the last 20% of training data as validation data. As\n",
"shown in the example below, you can use `validation_split` to specify the\n",
"percentage.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"reg.fit(\n",
" x_train,\n",
" y_train,\n",
" # Split the training data and use the last 15% as validation data.\n",
" validation_split=0.15,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"You can also use your own validation set instead of splitting it from the\n",
"training data with `validation_data`.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"split = 5\n",
"x_val = x_train[split:]\n",
"y_val = y_train[split:]\n",
"x_train = x_train[:split]\n",
"y_train = y_train[:split]\n",
"reg.fit(\n",
" x_train,\n",
" y_train,\n",
" epochs=1,\n",
" # Use your own validation set.\n",
" validation_data=(x_val, y_val),\n",
" batch_size=2,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"## Customized Search Space\n",
"For advanced users, you may customize your search space by using\n",
"[AutoModel](/auto_model/#automodel-class) instead of\n",
"[TextRegressor](/text_regressor). You can configure the\n",
"[TextBlock](/block/#textblock-class) for some high-level configurations. You can\n",
"also do not specify these arguments, which would leave the different choices to\n",
"be tuned automatically. See the following example for detail.\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"input_node = ak.TextInput()\n",
"output_node = ak.TextBlock()(input_node)\n",
"output_node = ak.RegressionHead()(output_node)\n",
"reg = ak.AutoModel(\n",
" inputs=input_node, outputs=output_node, overwrite=True, max_trials=1\n",
")\n",
"reg.fit(x_train, y_train, epochs=1, batch_size=2)"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text"
},
"source": [
"## Reference\n",
"[TextRegressor](/text_regressor),\n",
"[AutoModel](/auto_model/#automodel-class),\n",
"[TextBlock](/block/#textblock-class),\n",
"[ConvBlock](/block/#convblock-class),\n",
"[TextInput](/node/#textinput-class),\n",
"[RegressionHead](/block/#regressionhead-class).\n"
]
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"name": "text_regression",
"private_outputs": false,
"provenance": [],
"toc_visible": true
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
================================================
FILE: docs/keras_autodoc/__init__.py
================================================
from .autogen import DocumentationGenerator
from .gathering_members import get_classes
from .gathering_members import get_functions
from .gathering_members import get_methods
================================================
FILE: docs/keras_autodoc/autogen.py
================================================
import pathlib
import shutil
from inspect import getdoc
from inspect import isclass
from typing import Dict
from typing import List
from typing import Union
from typing import get_type_hints
from . import utils
from .docstring import process_docstring
from .examples import copy_examples
from .get_signatures import get_signature
class DocumentationGenerator:
"""Generates the documentation.
# Arguments
pages: A dictionary. The keys are the files' paths, the values
are lists of strings, functions /classes / methods names
with dotted access to the object. For example,
`pages = {'my_file.md': ['keras.layers.Dense']}` is valid.
project_url: The url pointing to the module directory of your project on
GitHub. This will be used to make a `[Sources]` link.
template_dir: Where to put the markdown files which will be copied and
filled in the destination directory. You should put files like
`index.md` inside. If you want a markdown file to be filled with
the docstring of a function, use the `{{autogenerated}}` tag inside,
and then add the markdown file to the `pages` dictionary.
example_dir: Where you store examples in your project. Usually
standalone files with a markdown docstring at the top. Will be
inserted in the docs.
extra_aliases: When displaying type hints, it's possible that the full
dotted path is displayed instead of alias. The aliases present in
`pages` are used, but it may happen if you're using a third-party
library. For example `tensorflow.python.ops.variables.Variable` is
displayed instead of `tensorflow.Variable`. Here you have two
solutions, either you provide the import keras-autodoc should
follow:
`extra_aliases=["tensorflow.Variable"]`, either you provide a
mapping to use
`extra_aliases={"tensorflow.python.ops.variables.Variable":
"tf.Variable"}`. The second option should be used if you want more
control and that you don't want to respect the alias corresponding
to the import (you can't do `import tf.Variable`). When giving a
list, keras-autodoc will try to import the object from the string to
understand what object you want to replace.
max_signature_line_length: When displaying class and function
signatures, keras-autodoc formats them using Black. This parameter
controls the maximum line length of these signatures, and is passed
directly through to Black.
titles_size: `"#"` signs to put before a title in the generated
markdown.
"""
def __init__(
self,
pages: Dict[str, list] = {},
project_url: Union[str, Dict[str, str]] = None,
template_dir=None,
examples_dir=None,
extra_aliases: Union[List[str], Dict[str, str]] = None,
max_signature_line_length: int = 110,
titles_size="###",
):
self.pages = pages
self.project_url = project_url
self.template_dir = template_dir
self.examples_dir = examples_dir
self.class_aliases = {}
self._fill_aliases(extra_aliases)
self.max_signature_line_length = max_signature_line_length
self.titles_size = titles_size
def generate(self, dest_dir):
"""Generate the docs.
# Arguments
dest_dir: Where to put the resulting markdown files.
"""
dest_dir = pathlib.Path(dest_dir)
print("Cleaning up existing sources directory.")
if dest_dir.exists():
shutil.rmtree(dest_dir)
print("Populating sources directory with templates.")
if self.template_dir:
shutil.copytree(self.template_dir, dest_dir)
for file_path, elements in self.pages.items():
markdown_text = ""
for element in elements:
markdown_text += self._render(element)
utils.insert_in_file(markdown_text, dest_dir / file_path)
if self.examples_dir is not None:
copy_examples(self.examples_dir, dest_dir / "examples")
def process_docstring(self, docstring, types: dict = None):
"""Can be overridden."""
processsed = process_docstring(docstring, types, self.class_aliases)
return processsed
def process_signature(self, signature):
"""Can be overridden."""
return signature
def _render(self, element):
if isinstance(element, str):
object_ = utils.import_object(element)
if utils.ismethod(object_):
# we remove the modules when displaying the methods
signature_override = ".".join(element.split(".")[-2:])
else:
signature_override = element
else:
signature_override = None
object_ = element
return self._render_from_object(object_, signature_override)
def _render_from_object(self, object_, signature_override: str):
subblocks = []
if self.project_url is not None:
subblocks.append(utils.make_source_link(object_, self.project_url))
signature = get_signature(
object_, signature_override, self.max_signature_line_length
)
signature = self.process_signature(signature)
subblocks.append(f"{self.titles_size} {object_.__name__}\n")
subblocks.append(utils.code_snippet(signature))
docstring = getdoc(object_)
if docstring:
if isclass(object_):
type_hints = get_type_hints(object_.__init__)
else:
type_hints = get_type_hints(object_)
docstring = self.process_docstring(docstring, type_hints)
subblocks.append(docstring)
return "\n\n".join(subblocks) + "\n\n----\n\n"
def _fill_aliases(self, extra_aliases):
for list_elements in self.pages.values():
for element_as_str in list_elements:
element = utils.import_object(element_as_str)
if not isclass(element):
continue
true_dotted_path = utils.get_dotted_path(element)
self.class_aliases[true_dotted_path] = element_as_str
if isinstance(extra_aliases, dict):
self.class_aliases.update(extra_aliases)
elif isinstance(extra_aliases, list):
for alias in extra_aliases:
full_dotted_path = utils.get_dotted_path(
utils.import_object(alias)
)
self.class_aliases[full_dotted_path] = alias
================================================
FILE: docs/keras_autodoc/docstring.py
================================================
import itertools
import re
from sphinx.util.typing import stringify_annotation
from . import utils
def get_code_blocks(docstring):
code_blocks = {}
tmp = docstring[:]
while "```" in tmp:
tmp = tmp[tmp.find("```") :]
index = tmp[3:].find("```") + 6
snippet = tmp[:index]
# Place marker in docstring for later reinjection.
token = f"$KERAS_AUTODOC_CODE_BLOCK_{len(code_blocks)}"
docstring = docstring.replace(snippet, token)
code_blocks[token] = snippet
tmp = tmp[index:]
return code_blocks, docstring
def get_section_end(docstring, section_start):
regex_indented_sections_end = re.compile(r"\S\n+(\S|$)")
end = re.search(regex_indented_sections_end, docstring[section_start:])
section_end = section_start + end.end()
if section_end == len(docstring):
return section_end
else:
return section_end - 2
def get_google_style_sections_without_code(docstring):
regex_indented_sections_start = re.compile(r"\n# .+?\n")
google_style_sections = {}
for i in itertools.count():
match = re.search(regex_indented_sections_start, docstring)
if match is None:
break
section_start = match.start() + 1
section_end = get_section_end(docstring, section_start)
google_style_section = docstring[section_start:section_end]
token = f"KERAS_AUTODOC_GOOGLE_STYLE_SECTION_{i}"
google_style_sections[token] = google_style_section
docstring = utils.insert_in_string(
docstring, token, section_start, section_end
)
return google_style_sections, docstring
def get_google_style_sections(docstring):
# First, extract code blocks and process them.
# The parsing is easier if the #, : and other symbols aren't there.
code_blocks, docstring = get_code_blocks(docstring)
google_style_sections, docstring = get_google_style_sections_without_code(
docstring
)
docstring = reinject_strings(docstring, code_blocks)
for section_token, section in google_style_sections.items():
google_style_sections[section_token] = reinject_strings(
section, code_blocks
)
return google_style_sections, docstring
def to_markdown(
google_style_section: str, types: dict = None, aliases=None
) -> str:
end_first_line = google_style_section.find("\n")
section_title = google_style_section[2:end_first_line]
section_body = google_style_section[end_first_line + 1 :]
section_body = utils.remove_indentation(section_body.strip())
# it's a list of elements, a special formatting is applied.
if section_title == "Arguments":
section_body = format_as_markdown_list(section_body, types, aliases)
elif section_title in ("Attributes", "Raises"):
section_body = format_as_markdown_list(section_body)
if section_body:
return f"__{section_title}__\n\n{section_body}\n"
else:
return f"__{section_title}__\n"
def format_as_markdown_list(
section_body, types: dict = None, aliases: dict = None
):
section_body = re.sub(r"\n([^ ].*?):", r"\n- __\1__:", section_body)
section_body = re.sub(r"^([^ ].*?):", r"- __\1__:", section_body)
# Optionally add type annotations to docstring
if types:
for arg, arg_type in types.items():
type_hint_str = apply_aliases(
stringify_annotation(arg_type), aliases
)
section_body = re.sub(
rf"(- __{arg}__)", rf"\1 `{type_hint_str}`", section_body
)
return section_body
def apply_aliases(string: str, aliases: dict):
for dotted_path, alias in aliases.items():
string = string.replace(dotted_path, alias)
return string
def reinject_strings(target, strings_to_inject):
for token, string_to_inject in strings_to_inject.items():
target = target.replace(token, string_to_inject)
return target
def process_docstring(docstring, types: dict = None, aliases=None):
if docstring[-1] != "\n":
docstring += "\n"
google_style_sections, docstring = get_google_style_sections(docstring)
for token, google_style_section in google_style_sections.items():
markdown_section = to_markdown(google_style_section, types, aliases)
docstring = docstring.replace(token, markdown_section)
return docstring
================================================
FILE: docs/keras_autodoc/examples.py
================================================
import os
import pathlib
def copy_examples(examples_dir, destination_dir):
"""Copy the examples directory in the documentation.
Prettify files by extracting the docstrings written in Markdown.
"""
pathlib.Path(destination_dir).mkdir(exist_ok=True)
for file in os.listdir(examples_dir):
if not file.endswith(".py"):
continue
module_path = os.path.join(examples_dir, file)
docstring, starting_line = get_module_docstring(module_path)
destination_file = os.path.join(destination_dir, file[:-2] + "md")
with open(destination_file, "w+", encoding="utf-8") as f_out, open(
examples_dir / file, "r+", encoding="utf-8"
) as f_in:
f_out.write(docstring + "\n\n")
# skip docstring
for _ in range(starting_line):
next(f_in)
f_out.write("```python\n")
# next line might be empty.
line = next(f_in)
if line != "\n":
f_out.write(line)
# copy the rest of the file.
for line in f_in:
f_out.write(line)
f_out.write("```")
def get_module_docstring(filepath):
"""Extract the module docstring.
Also finds the line at which the docstring ends.
"""
co = compile(open(filepath, encoding="utf-8").read(), filepath, "exec")
if co.co_consts and isinstance(co.co_consts[0], str):
docstring = co.co_consts[0]
else:
print("Could not get the docstring from " + filepath)
docstring = ""
return docstring, co.co_firstlineno
================================================
FILE: docs/keras_autodoc/gathering_members.py
================================================
import inspect
from inspect import isclass
from inspect import isfunction
from inspect import isroutine
from typing import List
from .utils import import_object
def get_classes(module, exclude: List[str] = None, return_strings: bool = True):
"""Get all the classes of a module.
# Arguments
module: The module to fetch the classes from. If it's a string, it
should be in the dotted format. `'keras.layers'` for example.
exclude: The names which will be excluded from the returned list. For
example, `get_classes('keras.layers', exclude=['Dense', 'Conv2D'])`.
return_strings: If False, the actual classes will be returned. Note that
if you use aliases when building your docs, you should use strings.
This is because the computed signature uses
`__name__` and `__module__` if you don't provide a string as input.
# Returns
A list of strings or a list of classes.
"""
return _get_all_module_element(module, exclude, return_strings, True)
def get_functions(
module, exclude: List[str] = None, return_strings: bool = True
):
"""Get all the functions of a module.
# Arguments
module: The module to fetch the functions from. If it's a string, it
should be in the dotted format. `'keras.backend'` for example.
exclude: The names which will be excluded from the returned list. For
example, `get_functions('keras.backend', exclude=['max'])`.
return_strings: If False, the actual functions will be returned. Note
that if you use aliases when building your docs, you should use
strings. This is because the computed signature uses `__name__` and
`__module__` if you don't provide a string as input.
# Returns
A list of strings or a list of functions.
"""
return _get_all_module_element(module, exclude, return_strings, False)
def get_methods(cls, exclude=None, return_strings=True):
"""Get all the method of a class.
# Arguments
cls: The class to fetch the methods from. If it's a
string, it should be in the dotted format. `'keras.layers.Dense'`
for example.
exclude: The names which will be excluded from the returned list. For
example, `get_methods('keras.Model', exclude=['save'])`.
return_strings: If False, the actual methods will be returned. Note that
if you use aliases when building your docs, you should use strings.
This is because the computed signature uses
`__name__` and `__module__` if you don't provide a string as input.
# Returns
A list of strings or a list of methods.
"""
if isinstance(cls, str):
cls_str = cls
cls = import_object(cls)
else:
cls_str = f"{cls.__module__}.{cls.__name__}"
exclude = exclude or []
methods = []
for _, method in inspect.getmembers(cls, predicate=isroutine):
if method.__name__[0] == "_" or method.__name__ in exclude:
continue
if return_strings:
methods.append(f"{cls_str}.{method.__name__}")
else:
methods.append(method)
return methods
def _get_all_module_element(module, exclude, return_strings, class_):
if isinstance(module, str):
module = import_object(module)
exclude = exclude or []
module_data = []
for name in dir(module):
module_member = getattr(module, name)
if not (isfunction(module_member) or isclass(module_member)):
continue
if name[0] == "_" or name in exclude:
continue
if module.__name__ not in module_member.__module__:
continue
if module_member in module_data:
continue
if class_ and not isclass(module_member):
continue
if not class_ and not isfunction(module_member):
continue
if return_strings:
module_data.append(f"{module.__name__}.{name}")
else:
module_data.append(module_member)
module_data.sort(key=id)
return module_data
================================================
FILE: docs/keras_autodoc/get_signatures.py
================================================
import inspect
import warnings
import black
from sphinx.util.inspect import signature
from sphinx.util.inspect import stringify_signature
from . import utils
def get_signature_start(function):
"""For the Dense layer, it should return the string 'keras.layers.Dense'"""
if utils.ismethod(function):
prefix = f"{utils.get_class_from_method(function).__name__}."
else:
try:
prefix = f"{function.__module__}."
except AttributeError:
warnings.warn(
f"function {function} has no module. "
f"It will not be included in the signature."
)
prefix = ""
return f"{prefix}{function.__name__}"
def get_signature_end(function):
sig = signature(function)
signature_end = stringify_signature(sig, show_annotation=False)
if utils.ismethod(function):
signature_end = signature_end.replace("(self, ", "(")
signature_end = signature_end.replace("(self)", "()")
return signature_end
def get_function_signature(function, override=None, max_line_length: int = 110):
if override is None:
signature_start = get_signature_start(function)
else:
signature_start = override
signature_end = get_signature_end(function)
return format_signature(signature_start, signature_end, max_line_length)
def get_class_signature(cls, override=None, max_line_length: int = 110):
if override is None:
signature_start = f"{cls.__module__}.{cls.__name__}"
else:
signature_start = override
signature_end = get_signature_end(cls.__init__)
return format_signature(signature_start, signature_end, max_line_length)
def get_signature(object_, override=None, max_line_length: int = 110):
if inspect.isclass(object_):
return get_class_signature(object_, override, max_line_length)
else:
return get_function_signature(object_, override, max_line_length)
def format_signature(
signature_start: str, signature_end: str, max_line_length: int = 110
):
"""pretty formatting to avoid long signatures on one single line"""
# first, we make it look like a real function declaration.
fake_signature_start = "x" * len(signature_start)
fake_signature = fake_signature_start + signature_end
fake_python_code = f"def {fake_signature}:\n pass\n"
# we format with black
mode = black.FileMode(line_length=max_line_length)
formatted_fake_python_code = black.format_str(fake_python_code, mode=mode)
# we make the final, multiline signature
new_signature_end = extract_signature_end(formatted_fake_python_code)
return signature_start + new_signature_end
def extract_signature_end(function_definition):
start = function_definition.find("(")
stop = function_definition.rfind(")")
return function_definition[start : stop + 1]
================================================
FILE: docs/keras_autodoc/utils.py
================================================
import importlib
import inspect
import os
import re
def count_leading_spaces(s):
ws = re.search(r"\S", s)
if ws:
return ws.start()
else:
return 0
def insert_in_file(markdown_text, file_path):
"""Save module page.
Either insert content into existing page,
or create page otherwise."""
if file_path.exists():
template = file_path.read_text(encoding="utf-8")
if "{{autogenerated}}" not in template:
raise RuntimeError(
f"Template found for {file_path} but missing "
f"{{autogenerated}} tag."
)
markdown_text = template.replace("{{autogenerated}}", markdown_text)
print("...inserting autogenerated content into template:", file_path)
else:
print("...creating new page with autogenerated content:", file_path)
os.makedirs(file_path.parent, exist_ok=True)
file_path.write_text(markdown_text, encoding="utf-8")
def code_snippet(snippet):
return f"```python\n{snippet}\n```\n"
def make_source_link(cls, project_url):
if isinstance(project_url, dict):
base_module = cls.__module__.split(".")[0]
project_url = project_url[base_module]
path = cls.__module__.replace(".", "/")
line = inspect.getsourcelines(cls)[-1]
return (
f''
f"[[source]]({project_url}/{path}.py#L{line})"
f""
)
def format_classes_list(classes, page_name):
for i in range(len(classes)):
if not isinstance(classes[i], (list, tuple)):
classes[i] = (classes[i], [])
for class_, class_methods in classes:
if not inspect.isclass(class_):
# TODO: add a test for this
raise TypeError(
f"{class_} was given in the class list "
f"of {page_name} but {class_} is not a Python class."
)
return classes
def get_class_from_method(meth):
"""See
https://stackoverflow.com/questions/3589311/
get-defining-class-of-unbound-method-object-in-python-3/
25959545#25959545
"""
if inspect.ismethod(meth):
for cls in inspect.getmro(meth.__self__.__class__):
if cls.__dict__.get(meth.__name__) is meth:
return cls
meth = meth.__func__ # fallback to __qualname__ parsing
if inspect.isfunction(meth):
cls = getattr(
inspect.getmodule(meth),
meth.__qualname__.split(".", 1)[0].rsplit(".", 1)[0],
)
if isinstance(cls, type):
return cls
return getattr(
meth, "__objclass__", None
) # handle special descriptor objects
def ismethod(function):
return get_class_from_method(function) is not None
def import_object(string: str):
"""Import an object from a string.
The object can be a function, class or method.
For example: `'keras.layers.Dense.get_weights'` is valid.
"""
last_object_got = None
seen_names = []
for name in string.split("."):
seen_names.append(name)
try:
last_object_got = importlib.import_module(".".join(seen_names))
except ModuleNotFoundError:
last_object_got = getattr(last_object_got, name)
return last_object_got
def get_type(object_) -> str:
if inspect.isclass(object_):
return "class"
elif ismethod(object_):
return "method"
elif inspect.isfunction(object_):
return "function"
else:
raise TypeError(
f"{object_} is detected as neither a class, a method nor"
f"a function."
)
def insert_in_string(target, string_to_insert, start, end):
target_start_cut = target[:start]
target_end_cut = target[end:]
return target_start_cut + string_to_insert + target_end_cut
def remove_indentation(string):
string = string.replace("\n ", "\n")
if string[:4] == " ":
string = string[4:]
return string
def get_dotted_path(class_):
return f"{class_.__module__}.{class_.__qualname__}"
================================================
FILE: docs/mkdocs.yml
================================================
site_name: AutoKeras
theme:
favicon: '/img/favicon.png'
logo: '/img/logo_white.svg'
name: 'material'
docs_dir: sources
repo_url: https://github.com/keras-team/autokeras
site_url: http://autokeras.com
edit_uri: ""
site_description: 'Documentation for AutoKeras.'
markdown_extensions:
- codehilite
- pymdownx.superfences:
custom_fences:
- name: mermaid
class: mermaid
format: !!python/name:pymdownx.superfences.fence_div_format
- pymdownx.emoji:
emoji_index: !!python/name:material.extensions.emoji.twemoji
emoji_generator: !!python/name:material.extensions.emoji.to_svg
- admonition
extra:
analytics:
provider: google
property: G-GTF9QP8DFD
extra_css:
- stylesheets/extra.css
extra_javascript:
- https://unpkg.com/mermaid@8.4.4/dist/mermaid.min.js
nav:
- Home: index.md
- Installation: install.md
- Tutorials:
- Overview: tutorial/overview.md
- Image Classification: tutorial/image_classification.md
- Image Regression: tutorial/image_regression.md
- Text Classification: tutorial/text_classification.md
- Text Regression: tutorial/text_regression.md
- Structured Data Classification: tutorial/structured_data_classification.md
- Structured Data Regression: tutorial/structured_data_regression.md
- Multi-Modal and Multi-Task: tutorial/multi.md
- Customized Model: tutorial/customized.md
- Export Model: tutorial/export.md
- Load Data from Disk: tutorial/load.md
- FAQ: tutorial/faq.md
- Contributing Guide: contributing.md
- Documentation:
- ImageClassifier: image_classifier.md
- ImageRegressor: image_regressor.md
- TextClassifier: text_classifier.md
- TextRegressor: text_regressor.md
- StructuredDataClassifier: structured_data_classifier.md
- StructuredDataRegressor: structured_data_regressor.md
- AutoModel: auto_model.md
- Base Class: base.md
- Node: node.md
- Block: block.md
- Utils: utils.md
- About: about.md
================================================
FILE: docs/py/customized.py
================================================
"""shell
export KERAS_BACKEND="torch"
pip install autokeras
"""
import keras
import numpy as np
import tree
from keras.datasets import mnist
import autokeras as ak
"""
In this tutorial, we show how to customize your search space with
[AutoModel](/auto_model/#automodel-class) and how to implement your own block
as search space. This API is mainly for advanced users who already know what
their model should look like.
## Customized Search Space
First, let us see how we can build the following neural network using the
building blocks in AutoKeras.
We can make use of the [AutoModel](/auto_model/#automodel-class) API in
AutoKeras to implemented as follows.
The usage is the same as the [Keras functional
API](https://keras.io/api/models/model/#with-the-functional-api).
Since this is just a demo, we use small amount of `max_trials` and `epochs`.
"""
input_node = ak.ImageInput()
output_node = ak.Normalization()(input_node)
output_node1 = ak.ConvBlock()(output_node)
output_node2 = ak.ResNetBlock(version="v2")(output_node)
output_node = ak.Merge()([output_node1, output_node2])
output_node = ak.ClassificationHead()(output_node)
auto_model = ak.AutoModel(
inputs=input_node, outputs=output_node, overwrite=True, max_trials=1
)
"""
Whild building the model, the blocks used need to follow this topology:
`Preprocessor` -> `Block` -> `Head`. `Normalization` and `ImageAugmentation`
are `Preprocessor`s.
`ClassificationHead` is `Head`. The rest are `Block`s.
In the code above, we use `ak.ResNetBlock(version='v2')` to specify the version
of ResNet to use. There are many other arguments to specify for each building
block. For most of the arguments, if not specified, they would be tuned
automatically. Please refer to the documentation links at the bottom of the
page for more details.
Then, we prepare some data to run the model.
"""
(x_train, y_train), (x_test, y_test) = mnist.load_data()
print(x_train.shape) # (60000, 28, 28)
print(y_train.shape) # (60000,)
print(y_train[:3]) # array([7, 2, 1], dtype=uint8)
# Feed the AutoModel with training data.
auto_model.fit(x_train[:100], y_train[:100], epochs=1)
# Predict with the best model.
predicted_y = auto_model.predict(x_test)
# Evaluate the best model with testing data.
print(auto_model.evaluate(x_test, y_test))
"""
For multiple input nodes and multiple heads search space, you can refer to
[this section](/tutorial/multi/#customized-search-space).
## Validation Data
If you would like to provide your own validation data or change the ratio of
the validation data, please refer to the Validation Data section of the
tutorials of [Image
Classification](/tutorial/image_classification/#validation-data), [Text
Classification](/tutorial/text_classification/#validation-data), [Structured
Data
Classification](/tutorial/structured_data_classification/#validation-data),
[Multi-task and Multiple Validation](/tutorial/multi/#validation-data).
## Implement New Block
You can extend the [Block](/base/#block-class)
class to implement your own building blocks and use it with
[AutoModel](/auto_model/#automodel-class).
The first step is to learn how to write a build function for
[KerasTuner](https://keras-team.github.io/keras-tuner/#usage-the-basics). You
need to override the [build function](/base/#build-method) of the block. The
following example shows how to implement a single Dense layer block whose
number of neurons is tunable.
"""
class SingleDenseLayerBlock(ak.Block):
def build(self, hp, inputs=None):
# Get the input_node from inputs.
input_node = tree.flatten(inputs)[0]
layer = keras.layers.Dense(
hp.Int("num_units", min_value=32, max_value=512, step=32)
)
output_node = layer(input_node)
return output_node
"""
You can connect it with other blocks and build it into an
[AutoModel](/auto_model/#automodel-class).
"""
# Build the AutoModel
input_node = ak.Input()
output_node = SingleDenseLayerBlock()(input_node)
output_node = ak.RegressionHead()(output_node)
auto_model = ak.AutoModel(input_node, output_node, overwrite=True, max_trials=1)
# Prepare Data
num_instances = 100
x_train = np.random.rand(num_instances, 20).astype(np.float32)
y_train = np.random.rand(num_instances, 1).astype(np.float32)
x_test = np.random.rand(num_instances, 20).astype(np.float32)
y_test = np.random.rand(num_instances, 1).astype(np.float32)
# Train the model
auto_model.fit(x_train, y_train, epochs=1)
print(auto_model.evaluate(x_test, y_test))
"""
## Reference
[AutoModel](/auto_model/#automodel-class)
**Nodes**:
[ImageInput](/node/#imageinput-class),
[Input](/node/#input-class),
[TextInput](/node/#textinput-class).
[StructuredDataInput](/node/#structureddatainput-class),
**Preprocessors**:
[FeatureEngineering](/block/#featureengineering-class),
[ImageAugmentation](/block/#imageaugmentation-class),
[LightGBM](/block/#lightgbm-class),
[Normalization](/block/#normalization-class),
**Blocks**:
[ConvBlock](/block/#convblock-class),
[DenseBlock](/block/#denseblock-class),
[Embedding](/block/#embedding-class),
[Merge](/block/#merge-class),
[ResNetBlock](/block/#resnetblock-class),
[RNNBlock](/block/#rnnblock-class),
[SpatialReduction](/block/#spatialreduction-class),
[TemporalReduction](/block/#temporalreduction-class),
[XceptionBlock](/block/#xceptionblock-class),
[ImageBlock](/block/#imageblock-class),
[TextBlock](/block/#textblock-class).
[StructuredDataBlock](/block/#structureddatablock-class),
"""
================================================
FILE: docs/py/export.py
================================================
"""shell
export KERAS_BACKEND="torch"
pip install autokeras
"""
import numpy as np
from keras.datasets import mnist
from keras.models import load_model
import autokeras as ak
"""
You can easily export your model the best model found by AutoKeras as a Keras
Model.
The following example uses [ImageClassifier](/image_classifier) as an example.
All the tasks and the [AutoModel](/auto_model/#automodel-class) has this
[export_model](/auto_model/#export_model-method) function.
"""
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# Initialize the image classifier.
clf = ak.ImageClassifier(
overwrite=True, max_trials=1
) # Try only 1 model.(Increase accordingly)
# Feed the image classifier with training data.
clf.fit(x_train, y_train, epochs=1) # Change no of epochs to improve the model
# Export as a Keras Model.
model = clf.export_model()
print(type(model)) #
model.save("model_autokeras.keras")
loaded_model = load_model(
"model_autokeras.keras", custom_objects=ak.CUSTOM_OBJECTS
)
predicted_y = loaded_model.predict(np.expand_dims(x_test, -1))
print(predicted_y)
================================================
FILE: docs/py/image_classification.py
================================================
"""shell
export KERAS_BACKEND="torch"
pip install autokeras
"""
from keras.datasets import mnist
import autokeras as ak
"""
## A Simple Example
The first step is to prepare your data. Here we use the MNIST dataset as an
example
"""
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train[:100]
y_train = y_train[:100]
x_test = x_test[:100]
y_test = y_test[:100]
print(x_train.shape) # (60000, 28, 28)
print(y_train.shape) # (60000,)
print(y_train[:3]) # array([7, 2, 1], dtype=uint8)
"""
The second step is to run the ImageClassifier.
It is recommended have more trials for more complicated datasets.
This is just a quick demo of MNIST, so we set max_trials to 1.
For the same reason, we set epochs to 10.
You can also leave the epochs unspecified for an adaptive number of epochs.
"""
# Initialize the image classifier.
clf = ak.ImageClassifier(overwrite=True, max_trials=1)
# Feed the image classifier with training data.
clf.fit(x_train, y_train, epochs=1)
# Predict with the best model.
predicted_y = clf.predict(x_test)
print(predicted_y)
# Evaluate the best model with testing data.
print(clf.evaluate(x_test, y_test))
"""
## Validation Data
By default, AutoKeras use the last 20% of training data as validation data. As
shown in the example below, you can use validation_split to specify the
percentage.
"""
clf.fit(
x_train,
y_train,
# Split the training data and use the last 15% as validation data.
validation_split=0.15,
epochs=1,
)
"""
You can also use your own validation set instead of splitting it from the
training data with validation_data.
"""
split = 50000
x_val = x_train[split:]
y_val = y_train[split:]
x_train = x_train[:split]
y_train = y_train[:split]
clf.fit(
x_train,
y_train,
# Use your own validation set.
validation_data=(x_val, y_val),
epochs=1,
)
"""
## Customized Search Space
For advanced users, you may customize your search space by using AutoModel
instead of ImageClassifier. You can configure the ImageBlock for some
high-level configurations, e.g., block_type for the type of neural network to
search, normalize for whether to do data normalization, augment for whether to
do data augmentation. You can also do not specify these arguments, which would
leave the different choices to be tuned automatically. See the following
example for detail.
"""
input_node = ak.ImageInput()
output_node = ak.ImageBlock(
# Only search ResNet architectures.
block_type="resnet",
# Normalize the dataset.
normalize=True,
# Do not do data augmentation.
augment=False,
)(input_node)
output_node = ak.ClassificationHead()(output_node)
clf = ak.AutoModel(
inputs=input_node, outputs=output_node, overwrite=True, max_trials=1
)
clf.fit(x_train, y_train, epochs=1)
"""
The usage of AutoModel is similar to the functional API of Keras. Basically, you
are building a graph, whose edges are blocks and the nodes are intermediate
outputs of blocks. To add an edge from input_node to output_node with
output_node = ak.[some_block]([block_args])(input_node).
You can even also use more fine grained blocks to customize the search space
even further. See the following example.
"""
input_node = ak.ImageInput()
output_node = ak.Normalization()(input_node)
output_node = ak.ImageAugmentation(horizontal_flip=False)(output_node)
output_node = ak.ResNetBlock(version="v2")(output_node)
output_node = ak.ClassificationHead()(output_node)
clf = ak.AutoModel(
inputs=input_node, outputs=output_node, overwrite=True, max_trials=1
)
clf.fit(x_train, y_train, epochs=1)
"""
## Reference
[ImageClassifier](/image_classifier),
[AutoModel](/auto_model/#automodel-class),
[ImageBlock](/block/#imageblock-class),
[Normalization](/block/#normalization-class),
[ImageAugmentation](/block/#image-augmentation-class),
[ResNetBlock](/block/#resnetblock-class),
[ImageInput](/node/#imageinput-class),
[ClassificationHead](/block/#classificationhead-class).
"""
================================================
FILE: docs/py/image_regression.py
================================================
"""shell
export KERAS_BACKEND="torch"
pip install autokeras
"""
from keras.datasets import mnist
import autokeras as ak
"""
To make this tutorial easy to follow, we just treat MNIST dataset as a
regression dataset. It means we will treat prediction targets of MNIST dataset,
which are integers ranging from 0 to 9 as numerical values, so that they can be
directly used as the regression targets.
## A Simple Example
The first step is to prepare your data. Here we use the MNIST dataset as an
example
"""
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train[:100]
y_train = y_train[:100]
x_test = x_test[:100]
y_test = y_test[:100]
print(x_train.shape) # (60000, 28, 28)
print(y_train.shape) # (60000,)
print(y_train[:3]) # array([7, 2, 1], dtype=uint8)
"""
The second step is to run the ImageRegressor. It is recommended have more
trials for more complicated datasets. This is just a quick demo of MNIST, so
we set max_trials to 1. For the same reason, we set epochs to 1. You can also
leave the epochs unspecified for an adaptive number of epochs.
"""
# Initialize the image regressor.
reg = ak.ImageRegressor(overwrite=True, max_trials=1)
# Feed the image regressor with training data.
reg.fit(x_train, y_train, epochs=1)
# Predict with the best model.
predicted_y = reg.predict(x_test)
print(predicted_y)
# Evaluate the best model with testing data.
print(reg.evaluate(x_test, y_test))
"""
## Validation Data
By default, AutoKeras use the last 20% of training data as validation data. As
shown in the example below, you can use validation_split to specify the
percentage.
"""
reg.fit(
x_train,
y_train,
# Split the training data and use the last 15% as validation data.
validation_split=0.15,
epochs=1,
)
"""
You can also use your own validation set instead of splitting it from the
training data with validation_data.
"""
split = 50000
x_val = x_train[split:]
y_val = y_train[split:]
x_train = x_train[:split]
y_train = y_train[:split]
reg.fit(
x_train,
y_train,
# Use your own validation set.
validation_data=(x_val, y_val),
epochs=2,
)
"""
## Customized Search Space
For advanced users, you may customize your search space by using AutoModel
instead of ImageRegressor. You can configure the ImageBlock for some high-level
configurations, e.g., block_type for the type of neural network to search,
normalize for whether to do data normalization, augment for whether to do data
augmentation. You can also do not specify these arguments, which would leave
the different choices to be tuned automatically. See the following example for
detail.
"""
input_node = ak.ImageInput()
output_node = ak.ImageBlock(
# Only search ResNet architectures.
block_type="resnet",
# Normalize the dataset.
normalize=False,
# Do not do data augmentation.
augment=False,
)(input_node)
output_node = ak.RegressionHead()(output_node)
reg = ak.AutoModel(
inputs=input_node, outputs=output_node, overwrite=True, max_trials=1
)
reg.fit(x_train, y_train, epochs=1)
"""
The usage of AutoModel is similar to the functional API of Keras. Basically,
you are building a graph, whose edges are blocks and the nodes are intermediate
outputs of blocks. To add an edge from input_node to output_node with
output_node = ak.[some_block]([block_args])(input_node).
You can even also use more fine grained blocks to customize the search space
even further. See the following example.
"""
input_node = ak.ImageInput()
output_node = ak.Normalization()(input_node)
output_node = ak.ImageAugmentation(horizontal_flip=False)(output_node)
output_node = ak.ResNetBlock(version="v2")(output_node)
output_node = ak.RegressionHead()(output_node)
reg = ak.AutoModel(
inputs=input_node, outputs=output_node, overwrite=True, max_trials=1
)
reg.fit(x_train, y_train, epochs=1)
"""
## Reference
[ImageRegressor](/image_regressor),
[AutoModel](/auto_model/#automodel-class),
[ImageBlock](/block/#imageblock-class),
[Normalization](/block/#normalization-class),
[ImageAugmentation](/block/#image-augmentation-class),
[ResNetBlock](/block/#resnetblock-class),
[ImageInput](/node/#imageinput-class),
[RegressionHead](/block/#regressionhead-class).
"""
================================================
FILE: docs/py/multi.py
================================================
"""shell
export KERAS_BACKEND="torch"
pip install autokeras
"""
import numpy as np
import autokeras as ak
"""
In this tutorial we are making use of the
[AutoModel](/auto_model/#automodel-class)
API to show how to handle multi-modal data and multi-task.
## What is multi-modal?
Multi-modal data means each data instance has multiple forms of information.
For example, a photo can be saved as a image. Besides the image, it may also
have when and where it was taken as its attributes, which can be represented as
numerical data.
## What is multi-task?
Multi-task here we refer to we want to predict multiple targets with the same
input features. For example, we not only want to classify an image according to
its content, but we also want to regress its quality as a float number between
0 and 1.
The following diagram shows an example of multi-modal and multi-task neural
network model.
It has two inputs the images and the numerical input data. Each image is
associated with a set of attributes in the numerical input data. From these
data, we are trying to predict the classification label and the regression value
at the same time.
## Data Preparation
To illustrate our idea, we generate some random image and numerical data as
the multi-modal data.
"""
num_instances = 10
# Generate image data.
image_data = np.random.rand(num_instances, 32, 32, 3).astype(np.float32)
# Generate numerical data.
numerical_data = np.random.rand(num_instances, 20).astype(np.float32)
"""
We also generate some multi-task targets for classification and regression.
"""
# Generate regression targets.
regression_target = np.random.rand(num_instances, 1).astype(np.float32)
# Generate classification labels of five classes.
classification_target = np.random.randint(5, size=num_instances)
"""
## Build and Train the Model
Then we initialize the multi-modal and multi-task model with
[AutoModel](/auto_model/#automodel-class).
Since this is just a demo, we use small amount of `max_trials` and `epochs`.
"""
# Initialize the multi with multiple inputs and outputs.
model = ak.AutoModel(
inputs=[ak.ImageInput(), ak.Input()],
outputs=[
ak.RegressionHead(metrics=["mae"]),
ak.ClassificationHead(
loss="categorical_crossentropy", metrics=["accuracy"]
),
],
overwrite=True,
max_trials=2,
)
# Fit the model with prepared data.
model.fit(
[image_data, numerical_data],
[regression_target, classification_target],
epochs=1,
batch_size=3,
)
"""
## Validation Data
By default, AutoKeras use the last 20% of training data as validation data.
As shown in the example below, you can use `validation_split` to specify the
percentage.
"""
model.fit(
[image_data, numerical_data],
[regression_target, classification_target],
# Split the training data and use the last 15% as validation data.
validation_split=0.15,
epochs=1,
batch_size=3,
)
"""
You can also use your own validation set
instead of splitting it from the training data with `validation_data`.
"""
split = 5
image_val = image_data[split:]
numerical_val = numerical_data[split:]
regression_val = regression_target[split:]
classification_val = classification_target[split:]
image_data = image_data[:split]
numerical_data = numerical_data[:split]
regression_target = regression_target[:split]
classification_target = classification_target[:split]
model.fit(
[image_data, numerical_data],
[regression_target, classification_target],
# Use your own validation set.
validation_data=(
[image_val, numerical_val],
[regression_val, classification_val],
),
epochs=1,
batch_size=3,
)
"""
## Customized Search Space
You can customize your search space.
The following figure shows the search space we want to define.
"""
input_node1 = ak.ImageInput()
output_node = ak.Normalization()(input_node1)
output_node = ak.ImageAugmentation()(output_node)
output_node1 = ak.ConvBlock()(output_node)
output_node2 = ak.ResNetBlock(version="v2")(output_node)
output_node1 = ak.Merge()([output_node1, output_node2])
input_node2 = ak.Input()
output_node2 = ak.DenseBlock()(input_node2)
output_node = ak.Merge()([output_node1, output_node2])
output_node1 = ak.ClassificationHead()(output_node)
output_node2 = ak.RegressionHead()(output_node)
auto_model = ak.AutoModel(
inputs=[input_node1, input_node2],
outputs=[output_node1, output_node2],
overwrite=True,
max_trials=2,
)
image_data = np.random.rand(num_instances, 32, 32, 3).astype(np.float32)
numerical_data = np.random.rand(num_instances, 20).astype(np.float32)
regression_target = np.random.rand(num_instances, 1).astype(np.float32)
classification_target = np.random.randint(5, size=num_instances)
auto_model.fit(
[image_data, numerical_data],
[classification_target, regression_target],
batch_size=3,
epochs=1,
)
"""
## Reference
[AutoModel](/auto_model/#automodel-class),
[ImageInput](/node/#imageinput-class),
[Input](/node/#input-class),
[DenseBlock](/block/#denseblock-class),
[RegressionHead](/block/#regressionhead-class),
[ClassificationHead](/block/#classificationhead-class),
[CategoricalToNumerical](/block/#categoricaltonumerical-class).
"""
================================================
FILE: docs/py/structured_data_classification.py
================================================
"""shell
export KERAS_BACKEND="torch"
pip install autokeras
"""
import keras
import pandas as pd
import autokeras as ak
"""
## A Simple Example
The first step is to prepare your data. Here we use the [Titanic
dataset](https://www.kaggle.com/c/titanic) as an example.
"""
TRAIN_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/train.csv"
TEST_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/eval.csv"
train_file_path = keras.utils.get_file("train.csv", TRAIN_DATA_URL)
test_file_path = keras.utils.get_file("eval.csv", TEST_DATA_URL)
# Load data into numpy arrays
train_df = pd.read_csv(train_file_path)
test_df = pd.read_csv(test_file_path)
y_train = train_df["survived"].values
x_train = train_df.drop("survived", axis=1).values
y_test = test_df["survived"].values
x_test = test_df.drop("survived", axis=1).values
"""
The second step is to run the
[StructuredDataClassifier](/structured_data_classifier).
As a quick demo, we set epochs to 10.
You can also leave the epochs unspecified for an adaptive number of epochs.
"""
# Initialize the structured data classifier.
clf = ak.StructuredDataClassifier(
overwrite=True, max_trials=3
) # It tries 3 different models.
# Feed the structured data classifier with training data.
clf.fit(
x_train,
y_train,
epochs=10,
)
# Predict with the best model.
predicted_y = clf.predict(x_test)
# Evaluate the best model with testing data.
print(clf.evaluate(x_test, y_test))
"""
You can also specify the column names and types for the data as follows. The
`column_names` is optional if the training data already have the column names,
e.g. pandas.DataFrame, CSV file. Any column, whose type is not specified will
be inferred from the training data.
"""
# Initialize the structured data classifier.
clf = ak.StructuredDataClassifier(
column_names=[
"sex",
"age",
"n_siblings_spouses",
"parch",
"fare",
"class",
"deck",
"embark_town",
"alone",
],
column_types={"sex": "categorical", "fare": "numerical"},
max_trials=10, # It tries 10 different models.
overwrite=True,
)
"""
## Validation Data
By default, AutoKeras use the last 20% of training data as validation data. As
shown in the example below, you can use `validation_split` to specify the
percentage.
"""
clf.fit(
x_train,
y_train,
# Split the training data and use the last 15% as validation data.
validation_split=0.15,
epochs=10,
)
"""
You can also use your own validation set
instead of splitting it from the training data with `validation_data`.
"""
split = 500
x_val = x_train[split:]
y_val = y_train[split:]
x_train = x_train[:split]
y_train = y_train[:split]
clf.fit(
x_train,
y_train,
# Use your own validation set.
validation_data=(x_val, y_val),
epochs=10,
)
"""
## Reference
[StructuredDataClassifier](/structured_data_classifier),
[AutoModel](/auto_model/#automodel-class),
[StructuredDataBlock](/block/#structureddatablock-class),
[DenseBlock](/block/#denseblock-class),
[StructuredDataInput](/node/#structureddatainput-class),
[ClassificationHead](/block/#classificationhead-class),
"""
================================================
FILE: docs/py/structured_data_regression.py
================================================
"""shell
export KERAS_BACKEND="torch"
pip install autokeras
"""
from sklearn.datasets import fetch_california_housing
import autokeras as ak
"""
## A Simple Example
The first step is to prepare your data. Here we use the [California housing
dataset](
https://scikit-learn.org/stable/datasets/real_world.html#california-housing-dataset)
as an example.
"""
house_dataset = fetch_california_housing()
train_size = int(house_dataset.data.shape[0] * 0.9)
x_train = house_dataset.data[:train_size]
y_train = house_dataset.target[:train_size]
x_test = house_dataset.data[train_size:]
y_test = house_dataset.target[train_size:]
"""
The second step is to run the
[StructuredDataRegressor](/structured_data_regressor).
As a quick demo, we set epochs to 10.
You can also leave the epochs unspecified for an adaptive number of epochs.
"""
# Initialize the structured data regressor.
reg = ak.StructuredDataRegressor(
overwrite=True, max_trials=3
) # It tries 3 different models.
# Feed the structured data regressor with training data.
reg.fit(
x_train,
y_train,
epochs=10,
)
# Predict with the best model.
predicted_y = reg.predict(x_test)
# Evaluate the best model with testing data.
print(reg.evaluate(x_test, y_test))
"""
You can also specify the column names and types for the data as follows. The
`column_names` is optional if the training data already have the column names,
e.g. pandas.DataFrame, CSV file. Any column, whose type is not specified will
be inferred from the training data.
"""
# Initialize the structured data regressor.
reg = ak.StructuredDataRegressor(
column_names=[
"MedInc",
"HouseAge",
"AveRooms",
"AveBedrms",
"Population",
"AveOccup",
"Latitude",
"Longitude",
],
column_types={"MedInc": "numerical", "Latitude": "numerical"},
max_trials=10, # It tries 10 different models.
overwrite=True,
)
"""
## Validation Data
By default, AutoKeras use the last 20% of training data as validation data. As
shown in the example below, you can use `validation_split` to specify the
percentage.
"""
reg.fit(
x_train,
y_train,
# Split the training data and use the last 15% as validation data.
validation_split=0.15,
epochs=10,
)
"""
You can also use your own validation set
instead of splitting it from the training data with `validation_data`.
"""
split = 500
x_val = x_train[split:]
y_val = y_train[split:]
x_train = x_train[:split]
y_train = y_train[:split]
reg.fit(
x_train,
y_train,
# Use your own validation set.
validation_data=(x_val, y_val),
epochs=10,
)
"""
## Reference
[StructuredDataRegressor](/structured_data_regressor),
[AutoModel](/auto_model/#automodel-class),
[StructuredDataBlock](/block/#structureddatablock-class),
[DenseBlock](/block/#denseblock-class),
[StructuredDataInput](/node/#structureddatainput-class),
[RegressionHead](/block/#regressionhead-class),
"""
================================================
FILE: docs/py/text_classification.py
================================================
"""shell
export KERAS_BACKEND="torch"
pip install autokeras
"""
import os
import keras
import numpy as np
from sklearn.datasets import load_files
import autokeras as ak
"""
## A Simple Example
The first step is to prepare your data. Here we use the [IMDB
dataset](https://keras.io/datasets/#imdb-movie-reviews-sentiment-classification)
as an example.
"""
dataset = keras.utils.get_file(
fname="aclImdb.tar.gz",
origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz",
extract=True,
)
# set path to dataset
IMDB_DATADIR = os.path.join(
os.path.dirname(dataset), "aclImdb_extracted", "aclImdb"
)
classes = ["pos", "neg"]
train_data = load_files(
os.path.join(IMDB_DATADIR, "train"), shuffle=True, categories=classes
)
test_data = load_files(
os.path.join(IMDB_DATADIR, "test"), shuffle=False, categories=classes
)
x_train = np.array(train_data.data)[:100]
y_train = np.array(train_data.target)[:100]
x_test = np.array(test_data.data)[:100]
y_test = np.array(test_data.target)[:100]
print(x_train.shape) # (25000,)
print(y_train.shape) # (25000, 1)
print(x_train[0][:50]) # this film was just brilliant casting
"""
The second step is to run the [TextClassifier](/text_classifier). As a quick
demo, we set epochs to 2. You can also leave the epochs unspecified for an
adaptive number of epochs.
"""
# Initialize the text classifier.
clf = ak.TextClassifier(
overwrite=True, max_trials=1
) # It only tries 1 model as a quick demo.
# Feed the text classifier with training data.
clf.fit(x_train, y_train, epochs=1, batch_size=2)
# Predict with the best model.
predicted_y = clf.predict(x_test)
# Evaluate the best model with testing data.
print(clf.evaluate(x_test, y_test))
"""
## Validation Data
By default, AutoKeras use the last 20% of training data as validation data. As
shown in the example below, you can use `validation_split` to specify the
percentage.
"""
clf.fit(
x_train,
y_train,
# Split the training data and use the last 15% as validation data.
validation_split=0.15,
epochs=1,
batch_size=2,
)
"""
You can also use your own validation set instead of splitting it from the
training data with `validation_data`.
"""
split = 5
x_val = x_train[split:]
y_val = y_train[split:]
x_train = x_train[:split]
y_train = y_train[:split]
clf.fit(
x_train,
y_train,
epochs=1,
# Use your own validation set.
validation_data=(x_val, y_val),
batch_size=2,
)
"""
## Customized Search Space
For advanced users, you may customize your search space by using
[AutoModel](/auto_model/#automodel-class) instead of
[TextClassifier](/text_classifier). You can configure the
[TextBlock](/block/#textblock-class) for some high-level configurations. You can
also do not specify these arguments, which would leave the different choices to
be tuned automatically. See the following example for detail.
"""
input_node = ak.TextInput()
output_node = ak.TextBlock()(input_node)
output_node = ak.ClassificationHead()(output_node)
clf = ak.AutoModel(
inputs=input_node, outputs=output_node, overwrite=True, max_trials=1
)
clf.fit(x_train, y_train, epochs=1, batch_size=2)
"""
## Reference
[TextClassifier](/text_classifier),
[AutoModel](/auto_model/#automodel-class),
[ConvBlock](/block/#convblock-class),
[TextInput](/node/#textinput-class),
[ClassificationHead](/block/#classificationhead-class).
"""
================================================
FILE: docs/py/text_regression.py
================================================
"""shell
export KERAS_BACKEND="torch"
pip install autokeras
"""
import os
import keras
import numpy as np
from sklearn.datasets import load_files
import autokeras as ak
"""
To make this tutorial easy to follow, we just treat IMDB dataset as a
regression dataset. It means we will treat prediction targets of IMDB dataset,
which are 0s and 1s as numerical values, so that they can be directly used as
the regression targets.
## A Simple Example
The first step is to prepare your data. Here we use the [IMDB
dataset](https://keras.io/datasets/#imdb-movie-reviews-sentiment-classification)
as an example.
"""
dataset = keras.utils.get_file(
fname="aclImdb.tar.gz",
origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz",
extract=True,
)
# set path to dataset
IMDB_DATADIR = os.path.join(
os.path.dirname(dataset), "aclImdb_extracted", "aclImdb"
)
classes = ["pos", "neg"]
train_data = load_files(
os.path.join(IMDB_DATADIR, "train"), shuffle=True, categories=classes
)
test_data = load_files(
os.path.join(IMDB_DATADIR, "test"), shuffle=False, categories=classes
)
x_train = np.array(train_data.data)[:100]
y_train = np.array(train_data.target)[:100]
x_test = np.array(test_data.data)[:100]
y_test = np.array(test_data.target)[:100]
print(x_train.shape) # (25000,)
print(y_train.shape) # (25000, 1)
print(x_train[0][:50]) # this film was just brilliant casting
"""
The second step is to run the [TextRegressor](/text_regressor). As a quick
demo, we set epochs to 2. You can also leave the epochs unspecified for an
adaptive number of epochs.
"""
# Initialize the text regressor.
reg = ak.TextRegressor(
overwrite=True, max_trials=1 # It tries 10 different models.
)
# Feed the text regressor with training data.
reg.fit(x_train, y_train, epochs=1, batch_size=2)
# Predict with the best model.
predicted_y = reg.predict(x_test)
# Evaluate the best model with testing data.
print(reg.evaluate(x_test, y_test))
"""
## Validation Data
By default, AutoKeras use the last 20% of training data as validation data. As
shown in the example below, you can use `validation_split` to specify the
percentage.
"""
reg.fit(
x_train,
y_train,
# Split the training data and use the last 15% as validation data.
validation_split=0.15,
)
"""
You can also use your own validation set instead of splitting it from the
training data with `validation_data`.
"""
split = 5
x_val = x_train[split:]
y_val = y_train[split:]
x_train = x_train[:split]
y_train = y_train[:split]
reg.fit(
x_train,
y_train,
epochs=1,
# Use your own validation set.
validation_data=(x_val, y_val),
batch_size=2,
)
"""
## Customized Search Space
For advanced users, you may customize your search space by using
[AutoModel](/auto_model/#automodel-class) instead of
[TextRegressor](/text_regressor). You can configure the
[TextBlock](/block/#textblock-class) for some high-level configurations. You can
also do not specify these arguments, which would leave the different choices to
be tuned automatically. See the following example for detail.
"""
input_node = ak.TextInput()
output_node = ak.TextBlock()(input_node)
output_node = ak.RegressionHead()(output_node)
reg = ak.AutoModel(
inputs=input_node, outputs=output_node, overwrite=True, max_trials=1
)
reg.fit(x_train, y_train, epochs=1, batch_size=2)
"""
## Reference
[TextRegressor](/text_regressor),
[AutoModel](/auto_model/#automodel-class),
[TextBlock](/block/#textblock-class),
[ConvBlock](/block/#convblock-class),
[TextInput](/node/#textinput-class),
[RegressionHead](/block/#regressionhead-class).
"""
================================================
FILE: docs/requirements.txt
================================================
mkdocs
mkdocs-material
pygments
jupyter
pymdown-extensions
Sphinx
markdown
black
================================================
FILE: docs/run_py_files.sh
================================================
#!/bin/bash
SUCCESS_FILE="success.txt"
FAILURE_FILE="failure.txt"
# Clear the files
> $SUCCESS_FILE
> $FAILURE_FILE
for file in py/*.py; do
if python3 "$file" > /dev/null 2>&1; then
echo "$file" >> $SUCCESS_FILE
else
echo "$file" >> $FAILURE_FILE
fi
done
================================================
FILE: docs/templates/about.md
================================================
This package is developed by [DATA LAB](http://faculty.cs.tamu.edu/xiahu/) at Texas A&M University,
collaborating with [keras-team](https://github.com/keras-team) for version 1.0 and above.
## Core Team
[**Haifeng Jin**](https://github.com/haifeng-jin):
Created, designed and implemented the AutoKeras system.
Maintainer.
[**François Chollet**](https://github.com/fchollet):
The API and system architecture design for AutoKeras 1.0.
Code reviews for pull requests.
[**Qingquan Song**](https://github.com/song3134):
Designed the neural architecture search algorithms.
Implemented the tabular data classification and regression module.
[**Xia "Ben" Hu**](http://faculty.cs.tamu.edu/xiahu/):
Project lead and maintainer.
================================================
FILE: docs/templates/index.md
================================================
#
##
{{autogenerated}}
================================================
FILE: docs/templates/install.md
================================================
## Requirements
**Python 3**: Follow the TensorFlow install steps to install Python 3.
**Pip**: Follow the TensorFlow install steps to install Pip.
**PyTorch >= 2.3.0**: AutoKeras is based on Keras. We recommend using the
PyTorch backend.
Please follow [this page](https://pytorch.org/get-started/locally/) to install
PyTorch.
## Install AutoKeras
AutoKeras only support **Python 3**.
If you followed previous steps to use virtualenv to install tensorflow,
you can just activate the virtualenv and use the following command to install AutoKeras.
```
pip install git+https://github.com/keras-team/keras-tuner.git
pip install autokeras
```
If you did not use virtualenv, and you use `python3` command to execute your python program,
please use the following command to install AutoKeras.
```
python3 -m pip install git+https://github.com/keras-team/keras-tuner.git
python3 -m pip install autokeras
```
================================================
FILE: docs/templates/stylesheets/extra.css
================================================
:root>* {
--md-primary-fg-color: #d00000;
--md-accent-fg-color: #d00000;
}
================================================
FILE: docs/templates/tutorial/faq.md
================================================
## How to resume a previously killed run?
This feature is controlled by the `overwrite` argument of `AutoModel` or any other task APIs.
It is set to `False` by default,
which means it would not overwrite the contents of the directory.
In other words, it will continue the previous fit.
You can just run the same code again.
It will automatically resume the previously killed run.
## How to customize metrics and loss?
Please see the code example below.
```python
import autokeras as ak
clf = ak.ImageClassifier(
max_trials=3,
metrics=['mse'],
loss='mse',
)
```
## How to use customized metrics to select the best model?
By default, AutoKeras use validation loss as the metric for selecting the best model.
Below is a code example of using customized metric for selecting models.
Please read the comments for the details.
```python
# Implement your customized metric according to the tutorial.
# https://keras.io/api/metrics/#creating-custom-metrics
import autokeras as ak
def f1_score(y_true, y_pred):
...
clf = ak.ImageClassifier(
max_trials=3,
# Wrap the function into a Keras Tuner Objective
# and pass it to AutoKeras.
# Direction can be 'min' or 'max'
# meaning we want to minimize or maximize the metric.
# 'val_f1_score' is just add a 'val_' prefix
# to the function name or the metric name.
objective=kerastuner.Objective('val_f1_score', direction='max'),
# Include it as one of the metrics.
metrics=[f1_score],
)
```
================================================
FILE: docs/templates/tutorial/overview.md
================================================
# AutoKeras 1.0 Tutorial
## Supported Tasks
AutoKeras supports several tasks with an extremely simple interface.
You can click the links below to see the detailed tutorial for each task.
**Supported Tasks**:
[Image Classification](/tutorial/image_classification)
[Image Regression](/tutorial/image_regression)
[Text Classification](/tutorial/text_classification)
[Text Regression](/tutorial/text_regression)
[Structured Data Classification](/tutorial/structured_data_classification)
[Structured Data Regression](/tutorial/structured_data_regression)
## Multi-Task and Multi-Modal Data
If you are dealing with multi-task or multi-modal dataset, you can refer to this
[tutorial](/tutorial/multi) for details.
## Customized Model
Follow this [tutorial](/tutorial/customized), to use AutoKeras building blocks to quickly construct your own model.
With these blocks, you only need to specify the high-level architecture of your model.
AutoKeras would search for the best detailed configuration for you.
Moreover, you can override the base classes to create your own block.
The following are the links to the documentation of the predefined input nodes and blocks in AutoKeras.
**Nodes**:
[ImageInput](/node/#imageinput-class)
[Input](/node/#input-class)
[TextInput](/node/#textinput-class)
[StructuredDataInput](/node/#structureddatainput-class)
**Blocks**:
[ImageAugmentation](/block/#imageaugmentation-class)
[Normalization](/block/#normalization-class)
[ConvBlock](/block/#convblock-class)
[DenseBlock](/block/#denseblock-class)
[Embedding](/block/#embedding-class)
[Merge](/block/#merge-class)
[ResNetBlock](/block/#resnetblock-class)
[RNNBlock](/block/#rnnblock-class)
[SpatialReduction](/block/#spatialreduction-class)
[TemporalReduction](/block/#temporalreduction-class)
[XceptionBlock](/block/#xceptionblock-class)
[ImageBlock](/block/#imageblock-class)
[TextBlock](/block/#textblock-class)
[StructuredDataBlock](/block/#structureddatablock-class)
[ClassificationHead](/block/#classificationhead-class)
[RegressionHead](/block/#regressionhead-class)
## Export Model
You can follow this [tutorial](/tutorial/export) to export the best model.
================================================
FILE: docs/tutobooks.py
================================================
"""Keras tutobooks implementation.
A tutobook is a tutorial available simultaneously as a notebook,
as a Python script, and as a nicely rendered webpage.
Its source-of-truth (for manual edition and version control) is
its Python script form, but you can also create one by starting
from a notebook and converting it with the command `nb2py`.
Text cells are stored in markdown-formatted comment blocks.
the first line (starting with " * 3) may optionally contain a special
annotation, one of:
- invisible: do not render this block.
- shell: execute this block while prefixing each line with `!`.
The script form should start with a header with the following fields:
Title:
Author: (could be `Authors`: as well, and may contain markdown links)
Date created: (date in yyyy/mm/dd format)
Last modified: (date in yyyy/mm/dd format)
Description: (one-line text description)
## How to add a new code example to Keras.io
You would typically start from an existing notebook.
Save it to disk (let's say as `path_to_your_nb.ipynb`).
`cd` to the `keras-io/scripts/` directory.
Then run:
```
python tutobooks nb2py path_to_your_nb.ipynb ../examples/your_example.py
```
This will create the file `examples/your_example.py`. Open it,
fill in the headers, and generally edit it so that it looks nice.
NOTE THAT THE CONVERSION SCRIPT MAY MAKE MISTAKES IN ITS ATTEMPTS
TO SHORTEN LINES. MAKE SURE TO PROOFREAD THE GENERATED .py IN FULL.
Or alternatively, make sure to keep your lines reasonably-sized (<90 char)
to start with, so that the script won't have to shorten them.
You can then preview what it looks like when converted back again
to ipynb by running:
```
python tutobooks py2nb ../examples/your_example.py preview.ipynb
```
NOTE THAT THIS COMMAND WILL ERROR OUT IF ANY CELLS TAKES TOO LONG
TO EXECUTE. In that case, make your code lighter/faster.
Remember that examples are meant to demonstrate workflows, not
train state-of-the-art models. They should
stay very lightweight.
Open the generated `preview.ipynb` and make sure it looks like what
you expect. If not, keep editing `your_example.py` until it does.
Finally, submit a PR adding `examples/your_example.py`.
"""
import json
import os
import random
import shutil
import sys
from pathlib import Path
TIMEOUT = 60 * 60
MAX_LOC = 300
def nb_to_py(nb_path, py_path):
f = open(nb_path)
content = f.read()
f.close()
nb = json.loads(content)
py = '"""\n'
py += "Title: FILLME\n"
py += "Author: FILLME\n"
py += "Date created: FILLME\n"
py += "Last modified: FILLME\n"
py += "Description: FILLME\n"
py += '"""\n'
for cell in nb["cells"]:
if cell["cell_type"] == "code":
# Is it a shell cell?
if (
cell["source"]
and cell["source"][0]
and cell["source"][0][0] == "!"
):
# It's a shell cell
py += '"""shell\n'
py += "".join(cell["source"]) + "\n"
py += '"""\n\n'
else:
# It's a Python cell
py += "".join(cell["source"]) + "\n\n"
elif cell["cell_type"] == "markdown":
py += '"""\n'
py += "".join(cell["source"]) + "\n"
py += '"""\n\n'
# Save file
f = open(py_path, "w")
f.write(py)
f.close()
# Format file with Black
os.system("black " + py_path)
# Shorten lines
py = open(py_path).read()
try:
py = _shorten_lines(py)
finally:
f = open(py_path, "w")
f.write(py)
f.close()
def py_to_nb(py_path, nb_path, fill_outputs=True):
f = open(py_path)
py = f.read()
f.close()
# validate(py)
# header, _, py, tag = _get_next_script_element(py)
# attributes = _parse_header(header)
cells = []
loc = 0
# Write first header cell
# header_cell = {
# "cell_type": "markdown",
# "source": [
# "# " + attributes["title"] + "\n",
# "\n",
# "**" + attributes["auth_field"] + ":** " + attributes["author"] +" \n",
# "**Date created:** " + attributes["date_created"] + " \n",
# "**Last modified:** " + attributes["last_modified"] + " \n",
# "**Description:** " + attributes["description"],
# ],
# "metadata": {"colab_type": "text"},
# }
# cells.append(header_cell)
while py:
e, cell_type, py, tag = _get_next_script_element(py)
lines = e.split("\n")
if all(line == "" for line in lines):
continue
if lines and not lines[0]:
lines = lines[1:]
source = [line + "\n" for line in lines]
# Drop last newline char
if source and not source[-1].strip():
source = source[:-1]
if tag == "shell":
source = ["!" + line for line in source]
cell_type = "code"
if tag != "invisible" and source:
cell = {"cell_type": cell_type, "source": source}
if cell_type == "code":
cell["outputs"] = []
cell["metadata"] = {"colab_type": "code"}
cell["execution_count"] = 0
loc += _count_locs(source)
else:
cell["metadata"] = {"colab_type": "text"}
cells.append(cell)
notebook = {}
for key in NB_BASE.keys():
notebook[key] = NB_BASE[key]
notebook["metadata"]["colab"]["name"] = str(py_path).split("/")[-1][:-3]
notebook["cells"] = cells
if loc > MAX_LOC:
raise ValueError(
"Found %d lines of code, but expected fewer than %d"
% (loc, MAX_LOC)
)
f = open(nb_path, "w")
f.write(json.dumps(notebook, indent=1, sort_keys=True))
f.close()
if fill_outputs:
print("Generating ipynb")
parent_dir = Path(nb_path).parent
current_files = os.listdir(parent_dir)
try:
os.system(
"jupyter nbconvert --to notebook --execute --debug "
+ str(nb_path)
+ " --inplace"
+ " --ExecutePreprocessor.timeout="
+ str(TIMEOUT)
)
finally:
new_files = os.listdir(parent_dir)
for fname in new_files:
if fname not in current_files:
fpath = parent_dir / fname
if os.path.isdir(fpath):
print("Removing created folder:", fname)
shutil.rmtree(fpath)
else:
print("Removing created file:", fname)
os.remove(fpath)
def nb_to_md(nb_path, md_path, img_dir, working_dir=None):
img_exts = ("png", "jpg", "jpeg")
# Assumes an already populated notebook.
assert str(md_path).endswith(".md")
current_dir = os.getcwd()
original_img_dir = str(img_dir)
if original_img_dir.endswith("/"):
original_img_dir = original_img_dir[:-1]
img_dir = os.path.abspath(img_dir)
nb_path = os.path.abspath(nb_path)
nb_fname = str(nb_path).split("/")[-1]
del_working_dir = False
if working_dir is None:
del_working_dir = True
working_dir = "tmp_" + str(random.randint(1e6, 1e7))
if not os.path.exists(working_dir):
os.makedirs(working_dir)
print("Using working_dir:", working_dir)
os.chdir(working_dir)
shutil.copyfile(nb_path, nb_fname)
md_name = str(md_path).split("/")[-1][:-3]
target_md = md_name + ".md"
img_dir = Path(img_dir) / md_name
if not os.path.exists(img_dir):
os.makedirs(img_dir)
os.system(
# "jupyter nbconvert --to markdown --execute --debug "
"jupyter nbconvert --to markdown "
+ nb_fname
+ " --output "
+ target_md
# + " --ExecutePreprocessor.timeout="
# + str(TIMEOUT)
)
tmp_img_dir = md_name + "_files"
if os.path.exists(tmp_img_dir):
for fname in os.listdir(tmp_img_dir):
if fname.endswith(img_exts):
src = Path(tmp_img_dir) / fname
target = Path(img_dir) / fname
print("copy", src, "to", target)
shutil.copyfile(src, target)
os.chdir(current_dir)
md_content = open(Path(working_dir) / (md_name + ".md")).read()
for ext in img_exts:
md_content = md_content.replace(
"
md_content = _make_output_code_blocks(md_content)
open(md_path, "w").write(md_content)
if del_working_dir:
shutil.rmtree(working_dir)
def py_to_md(py_path, nb_path, md_path, img_dir, working_dir=None):
py_to_nb(py_path, nb_path, fill_outputs=False)
nb_to_md(nb_path, md_path, img_dir, working_dir=working_dir)
def validate(py):
"""Validate the format of a tutobook script.
Specifically:
- validate headers
- validate style with black
"""
lines = py.split("\n")
if not lines[0].startswith('"""'):
raise ValueError('Missing `"""`-fenced header at top of script.')
if not lines[1].startswith("Title: "):
raise ValueError("Missing `Title:` field.")
if not lines[2].startswith("Author: ") and not lines[2].startswith(
"Authors: "
):
raise ValueError("Missing `Author:` field.")
if not lines[3].startswith("Date created: "):
raise ValueError("Missing `Date created:` field.")
if not lines[4].startswith("Last modified: "):
raise ValueError("Missing `Last modified:` field.")
if not lines[5].startswith("Description: "):
raise ValueError("Missing `Description:` field.")
description = lines[5][len("Description: ") :]
if not description:
raise ValueError("Missing `Description:` field content.")
if not description[0] == description[0].upper():
raise ValueError("Description field content must be capitalized.")
if not description[-1] == ".":
raise ValueError("Description field content must end with a period.")
if len(description) > 100:
raise ValueError(
"Description field content must be less than 100 chars."
)
for i, line in enumerate(lines):
if line.startswith('"""') and line.endswith('"""') and len(line) > 3:
raise ValueError(
'Do not use single line `"""`-fenced comments. '
"Encountered at line %d" % (i,)
)
for i, line in enumerate(lines):
if line.endswith(" "):
raise ValueError(
"Found trailing space on line %d; line: `%s`" % (i, line)
)
# Validate style with black
fpath = "/tmp/" + str(random.randint(1e6, 1e7)) + ".py"
f = open(fpath, "w")
pre_formatting = "\n".join(lines)
f.write(pre_formatting)
f.close()
os.system("black " + fpath)
f = open(fpath)
formatted = f.read()
f.close()
os.remove(fpath)
if formatted != pre_formatting:
raise ValueError(
"You python file did not follow `black` conventions. "
"Run `black your_file.py` to autoformat it."
)
def _count_locs(lines):
loc = 0
string_open = False
for line in lines:
line = line.strip()
if not line or line.startswith("#"):
continue
if not string_open:
if not line.startswith('"""'):
loc += 1
else:
if not line.endswith('"""'):
string_open = True
else:
if line.startswith('"""'):
string_open = False
return loc
def _shorten_lines(py):
max_len = 90
lines = []
for line in py.split("\n"):
if len(line) <= max_len:
lines.append(line)
continue
i = 0
while len(line) > max_len:
line = line.lstrip()
if " " not in line[1:]:
lines.append(line)
break
else:
short_line = line[:max_len]
line = line[max_len:]
if " " in short_line:
reversed_short_line = short_line[::-1]
index = reversed_short_line.find(" ") + 1
line = short_line[-index:] + line
short_line = short_line[:-index]
lines.append(short_line.lstrip())
i += 1
if i > 10:
raise
lines.append(line.lstrip())
return "\n".join(lines)
def _get_next_script_element(py):
lines = py.split("\n")
assert lines
elines = []
i = 0
tag = None
if lines[0].startswith('"""'):
assert len(lines) >= 2
etype = "markdown"
if len(lines[0]) > 3:
tag = lines[0][3:]
if tag not in ["shell", "invisible"]:
raise ValueError("Found unknown cell tag:", tag)
lines = lines[1:]
else:
etype = "code"
for i, line in enumerate(lines):
if line.startswith('"""'):
break
else:
elines.append(line)
if etype == "markdown":
py = "\n".join(lines[i + 1 :])
else:
py = "\n".join(lines[i:])
e = "\n".join(elines)
return e, etype, py, tag
def _parse_header(header):
lines = header.split("\n")
title = lines[0][len("Title: ") :]
author_line = lines[1]
if author_line.startswith("Authors"):
author = author_line[len("Authors: ") :]
auth_field = "Authors"
else:
author = author_line[len("Author: ") :]
auth_field = "Author"
date_created = lines[2][len("Date created: ") :]
last_modified = lines[3][len("Last modified: ") :]
description = lines[4][len("Description: ") :]
return {
"title": title,
"author": author,
"auth_field": auth_field,
"date_created": date_created,
"last_modified": last_modified,
"description": description,
}
def _make_output_code_blocks(md):
lines = md.split("\n")
output_lines = []
final_lines = []
is_inside_backticks = False
def is_output_line(line, prev_line, output_lines):
if line.startswith(" ") and len(line) >= 5:
if output_lines or (lines[i - 1].strip() == "" and line.strip()):
return True
return False
def flush(output_lines, final_lines):
final_lines.append('
')
final_lines.append("```")
if len(output_lines) == 1:
line = output_lines[0]
final_lines.append(line[4:])
else:
for line in output_lines:
final_lines.append(line[4:])
final_lines.append("```")
final_lines.append("
")
for i, line in enumerate(lines):
if line.startswith("```"):
is_inside_backticks = not is_inside_backticks
final_lines.append(line)
continue
if is_inside_backticks:
final_lines.append(line)
continue
if i > 0 and is_output_line(line, lines[-1], output_lines):
output_lines.append(line)
elif not line:
if output_lines:
if output_lines[-1]:
output_lines.append(line)
else:
final_lines.append(line)
else:
if output_lines:
flush(output_lines, final_lines)
output_lines = []
final_lines.append(line)
if output_lines:
flush(output_lines, final_lines)
return "\n".join(final_lines)
NB_BASE = {
"metadata": {
"colab": {
"collapsed_sections": [],
"name": "", # FILL ME
"private_outputs": False,
"provenance": [],
"toc_visible": True,
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3",
},
"language_info": {
"codemirror_mode": {"name": "ipython", "version": 3},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0",
},
},
"nbformat": 4,
"nbformat_minor": 0,
}
if __name__ == "__main__":
cmd = sys.argv[1]
if cmd not in {"nb2py", "py2nb"}:
raise ValueError(
"Specify a command: either "
"`nb2py source_filename.ipynb target_filename.py` or "
"`py2nb source_filename.py target_file name.ipynb"
)
if len(sys.argv) < 4:
raise ValueError("Specify a source filename and a target filename")
source = sys.argv[2]
target = sys.argv[3]
if cmd == "py2nb":
if not source.endswith(".py"):
raise ValueError(
"The source filename should be a Python file. Got:", source
)
if not target.endswith(".ipynb"):
raise ValueError(
"The target filename should be a notebook file. Got:", target
)
py_to_nb(source, target)
if cmd == "nb2py":
if not source.endswith(".ipynb"):
raise ValueError(
"The source filename should be a notebook file. Got:", source
)
if not target.endswith(".py"):
raise ValueError(
"The target filename should be a Python file. Got:", target
)
nb_to_py(source, target)
================================================
FILE: examples/automodel_with_cnn.py
================================================
# Library import
import keras
import numpy as np
import autokeras as ak
# Prepare example Data - Shape 1D
num_instances = 100
num_features = 5
x_train = np.random.rand(num_instances, num_features).astype(np.float32)
y_train = np.zeros(num_instances).astype(np.float32)
y_train[0 : int(num_instances / 2)] = 1
x_test = np.random.rand(num_instances, num_features).astype(np.float32)
y_test = np.zeros(num_instances).astype(np.float32)
y_train[0 : int(num_instances / 2)] = 1
x_train = np.expand_dims(
x_train, axis=2
) # This step it's very important an CNN will only accept this data shape
print(x_train.shape)
print(y_train.shape)
# Prepare Automodel for search
input_node = ak.Input()
output_node = ak.ConvBlock()(input_node)
# output_node = ak.DenseBlock()(output_node) #optional
# output_node = ak.SpatialReduction()(output_node) #optional
output_node = ak.ClassificationHead(num_classes=2, multi_label=True)(
output_node
)
auto_model = ak.AutoModel(
inputs=input_node, outputs=output_node, overwrite=True, max_trials=1
)
# Search
auto_model.fit(x_train, y_train, epochs=1)
print(auto_model.evaluate(x_test, y_test))
# Export as a Keras Model
model = auto_model.export_model()
print(type(model.summary()))
# print model as image
keras.utils.plot_model(
model, show_shapes=True, expand_treeed=True, to_file="name.png"
)
================================================
FILE: examples/celeb_age.py
================================================
"""
Regression tasks estimate a numeric variable, such as the price of a house or
voter turnout.
This example is adapted from a
[notebook](https://gist.github.com/mapmeld/98d1e9839f2d1f9c4ee197953661ed07)
which estimates a person's age from their image, trained on the
[IMDB-WIKI](https://data.vision.ee.ethz.ch/cvl/rrothe/imdb-wiki/) photographs
of famous
people.
First, prepare your image data in a numpy.ndarray.
Each image must have the same shape, meaning each has the same width, height,
and color channels as other images in the set.
"""
from datetime import datetime
from datetime import timedelta
import numpy as np
import pandas as pd
from google.colab import drive
from PIL import Image
from scipy.io import loadmat
import autokeras as ak
"""
### Connect your Google Drive for Data
"""
drive.mount("/content/drive")
"""
### Install AutoKeras
Download the master branch to your Google Drive for this tutorial. In general,
you can use *pip install autokeras* .
"""
"""shell
!pip install -v "/content/drive/My Drive/AutoKeras-dev/autokeras-master.zip"
!pip uninstall keras-tuner
!pip install
git+git://github.com/keras-team/keras-tuner.git@d2d69cba21a0b482a85ce2a38893e2322e139c01
"""
"""shell
!pip install torch
"""
"""
###**Import IMDB Celeb images and metadata**
"""
"""shell
!mkdir "./drive/My Drive/mlin/celebs"
"""
"""shell
! wget -O "./drive/My Drive/mlin/celebs/imdb_0.tar"
https://data.vision.ee.ethz.ch/cvl/rrothe/imdb-wiki/static/imdb_0.tar
"""
"""shell
! cd "./drive/My Drive/mlin/celebs" && tar -xf imdb_0.tar
! rm "./drive/My Drive/mlin/celebs/imdb_0.tar"
"""
"""
Uncomment and run the below cell if you need to re-run the cells again and
above don't need to install everything from the beginning.
"""
# ! cd ./drive/My\ Drive/mlin/celebs.
"""shell
! ls "./drive/My Drive/mlin/celebs/imdb/"
"""
"""shell
! wget https://data.vision.ee.ethz.ch/cvl/rrothe/imdb-wiki/static/imdb_meta.tar
! tar -xf imdb_meta.tar
! rm imdb_meta.tar
"""
"""
###**Converting from MATLAB date to actual Date-of-Birth**
"""
def datenum_to_datetime(datenum):
"""
Convert Matlab datenum into Python datetime.
"""
days = datenum % 1
hours = days % 1 * 24
minutes = hours % 1 * 60
seconds = minutes % 1 * 60
try:
return (
datetime.fromordinal(int(datenum))
+ timedelta(days=int(days))
+ timedelta(hours=int(hours))
+ timedelta(minutes=int(minutes))
+ timedelta(seconds=round(seconds))
- timedelta(days=366)
)
except Exception:
return datenum_to_datetime(700000)
print(datenum_to_datetime(734963))
"""
### **Opening MatLab file to Pandas DataFrame**
"""
x = loadmat("imdb/imdb.mat")
mdata = x["imdb"] # variable in mat file
mdtype = mdata.dtype # dtypes of structures are "unsized objects"
ndata = {n: mdata[n][0, 0] for n in mdtype.names}
columns = [n for n, v in ndata.items()]
rows = []
for col in range(0, 10):
values = list(ndata.items())[col]
for num, val in enumerate(values[1][0], start=0):
if col == 0:
rows.append([])
if num > 0:
if columns[col] == "dob":
rows[num].append(datenum_to_datetime(int(val)))
elif columns[col] == "photo_taken":
rows[num].append(datetime(year=int(val), month=6, day=30))
else:
rows[num].append(val)
dt = map(lambda row: np.array(row), np.array(rows[1:]))
df = pd.DataFrame(data=dt, index=range(0, len(rows) - 1), columns=columns)
print(df.head())
print(columns)
print(df["full_path"])
"""
### **Calculating age at time photo was taken**
"""
df["age"] = (df["photo_taken"] - df["dob"]).astype("int") / 31558102e9
print(df["age"])
"""
### **Creating dataset**
* We sample 200 of the images which were included in this first download.
* Images are resized to 128x128 to standardize shape and conserve memory
* RGB images are converted to grayscale to standardize shape
* Ages are converted to ints
"""
def df2numpy(train_set):
images = []
for img_path in train_set["full_path"]:
img = (
Image.open("./drive/My Drive/mlin/celebs/imdb/" + img_path[0])
.resize((128, 128))
.convert("L")
)
images.append(np.asarray(img, dtype="int32"))
image_inputs = np.array(images)
ages = train_set["age"].astype("int").to_numpy()
return image_inputs, ages
train_set = df[df["full_path"] < "02"].sample(200)
train_imgs, train_ages = df2numpy(train_set)
test_set = df[df["full_path"] < "02"].sample(100)
test_imgs, test_ages = df2numpy(test_set)
"""
### **Training using AutoKeras**
"""
# Initialize the image regressor
reg = ak.ImageRegressor(max_trials=15) # AutoKeras tries 15 different models.
# Find the best model for the given training data
reg.fit(train_imgs, train_ages)
# Predict with the chosen model:
# predict_y = reg.predict(test_images) # Uncomment if required
# Evaluate the chosen model with testing data
print(reg.evaluate(train_imgs, train_ages))
"""
### **Validation Data**
By default, AutoKeras use the last 20% of training data as validation data. As
shown in the example below, you can use validation_split to specify the
percentage.
"""
reg.fit(
train_imgs,
train_ages,
# Split the training data and use the last 15% as validation data.
validation_split=0.15,
epochs=3,
)
"""
You can also use your own validation set instead of splitting it from the
training data with validation_data.
"""
split = 460000
x_val = train_imgs[split:]
y_val = train_ages[split:]
x_train = train_imgs[:split]
y_train = train_ages[:split]
reg.fit(
x_train,
y_train,
# Use your own validation set.
validation_data=(x_val, y_val),
epochs=3,
)
"""
### **Customized Search Space**
For advanced users, you may customize your search space by using AutoModel
instead of ImageRegressor. You can configure the ImageBlock for some high-level
configurations, e.g., block_type for the type of neural network to search,
normalize for whether to do data normalization, augment for whether to do data
augmentation. You can also choose not to specify these arguments, which would
leave the different choices to be tuned automatically. See the following
example for detail.
"""
input_node = ak.ImageInput()
output_node = ak.ImageBlock(
# Only search ResNet architectures.
block_type="resnet",
# Normalize the dataset.
normalize=True,
# Do not do data augmentation.
augment=False,
)(input_node)
output_node = ak.RegressionHead()(output_node)
reg = ak.AutoModel(inputs=input_node, outputs=output_node, max_trials=10)
reg.fit(x_train, y_train, epochs=3)
"""
The usage of AutoModel is similar to the functional API of Keras. Basically, you
are building a graph, whose edges are blocks and the nodes are intermediate
outputs of blocks. To add an edge from input_node to output_node with
output_node = ak.some_block(input_node). You can even also use more fine
grained blocks to customize the search space even further. See the following
example.
"""
input_node = ak.ImageInput()
output_node = ak.Normalization()(input_node)
output_node = ak.ImageAugmentation(translation_factor=0.3)(output_node)
output_node = ak.ResNetBlock(version="v2")(output_node)
output_node = ak.RegressionHead()(output_node)
clf = ak.AutoModel(inputs=input_node, outputs=output_node, max_trials=10)
clf.fit(x_train, y_train, epochs=3)
"""
## References
[Main Reference
Notebook](https://gist.github.com/mapmeld/98d1e9839f2d1f9c4ee197953661ed07),
[Dataset](https://data.vision.ee.ethz.ch/cvl/rrothe/imdb-wiki/),
[ImageRegressor](/image_regressor),
[ResNetBlock](/block/#resnetblock-class),
[ImageInput](/node/#imageinput-class),
[AutoModel](/auto_model/#automodel-class),
[ImageBlock](/block/#imageblock-class),
[Normalization](/preprocessor/#normalization-class),
[ImageAugmentation](/preprocessor/#image-augmentation-class),
[RegressionHead](/head/#regressionhead-class).
"""
================================================
FILE: examples/cifar10.py
================================================
from keras.datasets import cifar10
import autokeras as ak
# Prepare the dataset.
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
# Initialize the ImageClassifier.
clf = ak.ImageClassifier(max_trials=3)
# Search for the best model.
clf.fit(x_train, y_train, epochs=5)
# Evaluate on the testing data.
print("Accuracy: {accuracy}".format(accuracy=clf.evaluate(x_test, y_test)[1]))
================================================
FILE: examples/imdb.py
================================================
"""
Search for a good model for the
[IMDB](
https://keras.io/datasets/#imdb-movie-reviews-sentiment-classification) dataset.
"""
import keras
import numpy as np
import autokeras as ak
def imdb_raw():
max_features = 20000
index_offset = 3 # word index offset
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(
num_words=max_features, index_from=index_offset
)
x_train = x_train
y_train = y_train.reshape(-1, 1)
x_test = x_test
y_test = y_test.reshape(-1, 1)
word_to_id = keras.datasets.imdb.get_word_index()
word_to_id = {k: (v + index_offset) for k, v in word_to_id.items()}
word_to_id[""] = 0
word_to_id[""] = 1
word_to_id[""] = 2
id_to_word = {value: key for key, value in word_to_id.items()}
x_train = list(
map(lambda sentence: " ".join(id_to_word[i] for i in sentence), x_train)
)
x_test = list(
map(lambda sentence: " ".join(id_to_word[i] for i in sentence), x_test)
)
x_train = np.array(x_train, dtype=np.str)
x_test = np.array(x_test, dtype=np.str)
return (x_train, y_train), (x_test, y_test)
# Prepare the data.
(x_train, y_train), (x_test, y_test) = imdb_raw()
print(x_train.shape) # (25000,)
print(y_train.shape) # (25000, 1)
print(x_train[0][:50]) # this film was just brilliant casting
# Initialize the TextClassifier
clf = ak.TextClassifier(max_trials=3)
# Search for the best model.
clf.fit(x_train, y_train, epochs=2, batch_size=8)
# Evaluate on the testing data.
print("Accuracy: {accuracy}".format(accuracy=clf.evaluate(x_test, y_test)))
================================================
FILE: examples/mnist.py
================================================
"""
Search for a good model for the
[MNIST](https://keras.io/datasets/#mnist-database-of-handwritten-digits)
dataset.
"""
from keras.datasets import mnist
import autokeras as ak
# Prepare the dataset.
(x_train, y_train), (x_test, y_test) = mnist.load_data()
print(x_train.shape) # (60000, 28, 28)
print(y_train.shape) # (60000,)
print(y_train[:3]) # array([7, 2, 1], dtype=uint8)
# Initialize the ImageClassifier.
clf = ak.ImageClassifier(max_trials=3)
# Search for the best model.
clf.fit(x_train, y_train, epochs=10)
# Evaluate on the testing data.
print("Accuracy: {accuracy}".format(accuracy=clf.evaluate(x_test, y_test)))
================================================
FILE: examples/new_pop.py
================================================
"""shell
pip install autokeras
"""
import pandas as pd
import autokeras as ak
"""
## Social Media Articles Example
Regression tasks estimate a numeric variable, such as the price of a house
or a person's age.
This example estimates the view counts for an article on social media platforms,
trained on a
[News Popularity](
https://archive.ics.uci.edu/ml/datasets/
News+Popularity+in+Multiple+Social+Media+Platforms)
dataset collected from 2015-2016.
First, prepare your text data in a `numpy.ndarray` format.
"""
# converting from other formats (such as pandas) to numpy
df = pd.read_csv("./News_Final.csv")
text_inputs = df.Title.to_numpy(dtype="str")
media_success_outputs = df.Facebook.to_numpy(dtype="int")
"""
Next, initialize and train the [TextRegressor](/text_regressor).
"""
# Initialize the text regressor
reg = ak.TextRegressor(max_trials=15) # AutoKeras tries 15 different models.
# Find the best model for the given training data
reg.fit(text_inputs, media_success_outputs)
# Predict with the chosen model:
predict_y = reg.predict(text_inputs)
================================================
FILE: examples/reuters.py
================================================
"""shell
!pip install -q -U pip
!pip install -q -U autokeras==1.0.8
!pip install -q git+https://github.com/keras-team/keras-tuner.git@1.0.2rc1
"""
import keras
import numpy as np
from keras.datasets import reuters
import autokeras as ak
"""
Search for a good model for the
[Reuters](https://keras.io/ja/datasets/#_5) dataset.
"""
# Prepare the dataset.
def reuters_raw(max_features=20000):
index_offset = 3 # word index offset
(x_train, y_train), (x_test, y_test) = reuters.load_data(
num_words=max_features, index_from=index_offset
)
x_train = x_train
y_train = y_train.reshape(-1, 1)
x_test = x_test
y_test = y_test.reshape(-1, 1)
word_to_id = reuters.get_word_index()
word_to_id = {k: (v + index_offset) for k, v in word_to_id.items()}
word_to_id[""] = 0
word_to_id[""] = 1
word_to_id[""] = 2
id_to_word = {value: key for key, value in word_to_id.items()}
x_train = list(
map(lambda sentence: " ".join(id_to_word[i] for i in sentence), x_train)
)
x_test = list(
map(lambda sentence: " ".join(id_to_word[i] for i in sentence), x_test)
)
x_train = np.array(x_train, dtype=np.str)
x_test = np.array(x_test, dtype=np.str)
return (x_train, y_train), (x_test, y_test)
# Prepare the data.
(x_train, y_train), (x_test, y_test) = reuters_raw()
print(x_train.shape) # (8982,)
print(y_train.shape) # (8982, 1)
print(x_train[0][:50]) # said as a result of its decemb
# Initialize the TextClassifier
clf = ak.TextClassifier(
max_trials=5,
overwrite=True,
)
# Callback to avoid overfitting with the EarlyStopping.
cbs = [
keras.callbacks.EarlyStopping(patience=3),
]
# Search for the best model.
clf.fit(x_train, y_train, epochs=10, callback=cbs)
# Evaluate on the testing data.
print("Accuracy: {accuracy}".format(accuracy=clf.evaluate(x_test, y_test)))
================================================
FILE: pyproject.toml
================================================
[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"
[project]
name = "autokeras"
authors = [
{name = "Keras team", email = "keras-users@googlegroups.com"},
]
description = "AutoML for deep learning"
readme = "README.md"
requires-python = ">=3.8"
license = {text = "Apache License 2.0"}
dynamic = ["version"]
classifiers = [
"Intended Audience :: Developers",
"Intended Audience :: Education",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Topic :: Scientific/Engineering :: Mathematics",
"Topic :: Software Development :: Libraries :: Python Modules",
"Topic :: Software Development :: Libraries",
]
dependencies = [
"packaging",
"keras-tuner>=1.4.0",
"keras>=3.0.0",
"dm-tree",
]
[project.optional-dependencies]
tests = [
"pandas",
"pytest>=4.4.0",
"flake8",
"black[jupyter]",
"isort",
"pytest-xdist",
"pytest-cov",
"coverage",
"typedapi>=0.2,<0.3",
"scikit-learn",
]
[project.urls]
Home = "https://autokeras.com/"
Repository = "https://github.com/keras-team/autokeras"
[tool.setuptools.dynamic]
version = {attr = "autokeras.__init__.__version__"}
[tool.setuptools.packages.find]
include = ["autokeras", "autokeras.*"]
[tool.black]
line-length = 80
target-version = []
[tool.isort]
profile = "black"
known_first_party = ["autokeras", "tests"]
default_section = "THIRDPARTY"
line_length = 80
force_single_line = "True"
================================================
FILE: setup.cfg
================================================
[tool:pytest]
addopts=-v
-p no:warnings
--durations=10
--log-cli-level=CRITICAL
# Do not run tests in the build folder
norecursedirs= build
[coverage:report]
exclude_lines =
pragma: no cover
@abstract
raise NotImplementedError
omit =
*test*
autokeras/prototype/*
[flake8]
# imported but unused in __init__.py, that's ok.
per-file-ignores = **/__init__.py:F401
ignore = E203, W503
max-line-length = 80
================================================
FILE: shell/contributors.py
================================================
import base64
import json
import os
import sys
from io import BytesIO
import requests
from PIL import Image
def main(directory):
contributors = []
for contributor in json.load(open("contributors.json")):
if contributor["type"] != "User":
continue
if contributor["login"] == "codacy-badger":
continue
contributors.append(contributor)
size = 36
gap = 3
elem_per_line = 22
width = elem_per_line * (size + gap) + gap
height = ((len(contributors) - 1) // elem_per_line + 1) * (size + gap) + gap
html = '"
print(html)
if __name__ == "__main__":
main(sys.argv[1])
================================================
FILE: shell/contributors.sh
================================================
# node shell/generate_json.js
gh api -H "Accept: application/vnd.github+json" /repos/keras-team/autokeras/contributors --paginate > response.json
sed "s/\]\[/,/g" response.json > contributors.json
rm response.json
mkdir avatars
python shell/contributors.py avatars > docs/templates/img/contributors.svg
rm contributors.json
rm -rf avatars
================================================
FILE: shell/copyright.txt
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
================================================
FILE: shell/cov.sh
================================================
pytest tests --cov-report html --cov-report xml:cov.xml --cov=autokeras
================================================
FILE: shell/coverage.sh
================================================
pytest --cov-report xml:cov.xml --cov autokeras $1
================================================
FILE: shell/docs.sh
================================================
#!/usr/bin/env bash
cd docs
python autogen.py
mkdocs build
cd ..
sh shell/format.sh
echo "autokeras.com" > docs/site/CNAME
git checkout -b gh-pages-temp
git add -f docs/site
git commit -m "gh-pages update"
git subtree split --prefix docs/site -b gh-pages
git push -f origin gh-pages:gh-pages
git branch -D gh-pages
git checkout master
git branch -D gh-pages-temp
================================================
FILE: shell/format.sh
================================================
isort .
black .
for i in $(find autokeras benchmark -name '*.py')
do
if ! grep -q Copyright $i
then
echo $i
cat shell/copyright.txt $i >$i.new && mv $i.new $i
fi
done
flake8 .
================================================
FILE: shell/generate_json.js
================================================
const { Octokit } = require("@octokit/rest");
const fs = require('fs')
const octokit = new Octokit({});
octokit.paginate(octokit.repos.listContributors,{
owner: 'keras-team',
repo: 'autokeras',
}).then((contributors) => {
fs.writeFileSync('contributors.json', JSON.stringify(contributors))
});
================================================
FILE: shell/lint.sh
================================================
isort -c .
if ! [ $? -eq 0 ]
then
echo "Please run \"sh shell/format.sh\" to format the code."
exit 1
fi
flake8 .
if ! [ $? -eq 0 ]
then
echo "Please fix the code style issue."
exit 1
fi
black --check .
if ! [ $? -eq 0 ]
then
echo "Please run \"sh shell/format.sh\" to format the code."
exit 1
fi
for i in $(find autokeras benchmark -name '*.py') # or whatever other pattern...
do
if ! grep -q Copyright $i
then
echo "Please run \"sh shell/format.sh\" to format the code."
exit 1
fi
done
================================================
FILE: shell/perf.sh
================================================
pytest tests/performance.py | tee perf_output.txt
================================================
FILE: shell/pre-commit.sh
================================================
#!/usr/bin/env bash
set -e
export DOCKER_BUILDKIT=1
docker build -t autokeras_formatting -f docker/pre-commit.Dockerfile .
docker run --rm -t -v "$(pwd -P):/autokeras" autokeras_formatting
================================================
FILE: shell/pypi.sh
================================================
#!/usr/bin/env bash
rm dist/*
python -m build
twine upload --repository-url https://upload.pypi.org/legacy/ dist/*