Repository: keras-team/autokeras Branch: master Commit: a2446cf16edc Files: 184 Total size: 601.8 KB Directory structure: gitextract_87bybot9/ ├── .devcontainer/ │ ├── Dockerfile │ ├── devcontainer.json │ └── setup.sh ├── .dockerignore ├── .github/ │ ├── CODEOWNERS │ ├── CODE_OF_CONDUCT.md │ ├── CONTRIBUTING.md │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.md │ │ ├── custom.md │ │ ├── feature_request.md │ │ └── new-task-template.md │ ├── PULL_REQUEST_TEMPLATE.md │ ├── dependabot.yml │ └── workflows/ │ └── actions.yml ├── .gitignore ├── LICENSE ├── README.md ├── RELEASE.md ├── autokeras/ │ ├── __init__.py │ ├── adapters/ │ │ ├── __init__.py │ │ ├── input_adapters.py │ │ ├── input_adapters_test.py │ │ ├── output_adapters.py │ │ └── output_adapters_test.py │ ├── analysers/ │ │ ├── __init__.py │ │ ├── input_analysers.py │ │ ├── input_analysers_test.py │ │ ├── output_analysers.py │ │ └── output_analysers_test.py │ ├── auto_model.py │ ├── auto_model_test.py │ ├── blocks/ │ │ ├── __init__.py │ │ ├── basic.py │ │ ├── basic_test.py │ │ ├── heads.py │ │ ├── heads_test.py │ │ ├── preprocessing.py │ │ ├── preprocessing_test.py │ │ ├── reduction.py │ │ ├── reduction_test.py │ │ ├── wrapper.py │ │ └── wrapper_test.py │ ├── conftest.py │ ├── engine/ │ │ ├── __init__.py │ │ ├── adapter.py │ │ ├── adapter_test.py │ │ ├── analyser.py │ │ ├── analyser_test.py │ │ ├── block.py │ │ ├── block_test.py │ │ ├── head.py │ │ ├── head_test.py │ │ ├── hyper_preprocessor.py │ │ ├── io_hypermodel.py │ │ ├── named_hypermodel.py │ │ ├── node.py │ │ ├── node_test.py │ │ ├── preprocessor.py │ │ ├── serializable.py │ │ ├── tuner.py │ │ └── tuner_test.py │ ├── graph.py │ ├── graph_test.py │ ├── hyper_preprocessors.py │ ├── hyper_preprocessors_test.py │ ├── integration_tests/ │ │ ├── __init__.py │ │ ├── functional_api_test.py │ │ ├── io_api_test.py │ │ └── task_api_test.py │ ├── keras_layers.py │ ├── keras_layers_test.py │ ├── nodes.py │ ├── nodes_test.py │ ├── pipeline.py │ ├── pipeline_test.py │ ├── preprocessors/ │ │ ├── __init__.py │ │ ├── common.py │ │ ├── common_test.py │ │ ├── encoders.py │ │ ├── encoders_test.py │ │ ├── postprocessors.py │ │ └── postprocessors_test.py │ ├── tasks/ │ │ ├── __init__.py │ │ ├── image.py │ │ ├── image_test.py │ │ ├── structured_data.py │ │ ├── structured_data_test.py │ │ ├── text.py │ │ └── text_test.py │ ├── test_utils.py │ ├── tuners/ │ │ ├── __init__.py │ │ ├── bayesian_optimization.py │ │ ├── greedy.py │ │ ├── greedy_test.py │ │ ├── hyperband.py │ │ ├── hyperband_test.py │ │ ├── random_search.py │ │ ├── random_search_test.py │ │ ├── task_specific.py │ │ └── task_specific_test.py │ └── utils/ │ ├── __init__.py │ ├── data_utils.py │ ├── data_utils_test.py │ ├── io_utils.py │ ├── layer_utils.py │ ├── types.py │ ├── utils.py │ └── utils_test.py ├── benchmark/ │ ├── README.md │ ├── __init__.py │ ├── datasets/ │ │ ├── titanic_test.csv │ │ └── titanic_train.csv │ ├── experiments/ │ │ ├── __init__.py │ │ ├── experiment.py │ │ ├── image.py │ │ ├── structured_data.py │ │ └── text.py │ ├── performance.py │ └── run.py ├── codecov.yml ├── docker/ │ ├── Dockerfile │ ├── Makefile │ ├── README.md │ ├── devel.Dockerfile │ ├── pre-commit.Dockerfile │ └── pre_commit.py ├── docs/ │ ├── README.md │ ├── autogen.py │ ├── ipynb/ │ │ ├── customized.ipynb │ │ ├── export.ipynb │ │ ├── image_classification.ipynb │ │ ├── image_regression.ipynb │ │ ├── multi.ipynb │ │ ├── structured_data_classification.ipynb │ │ ├── structured_data_regression.ipynb │ │ ├── text_classification.ipynb │ │ └── text_regression.ipynb │ ├── keras_autodoc/ │ │ ├── __init__.py │ │ ├── autogen.py │ │ ├── docstring.py │ │ ├── examples.py │ │ ├── gathering_members.py │ │ ├── get_signatures.py │ │ └── utils.py │ ├── mkdocs.yml │ ├── py/ │ │ ├── customized.py │ │ ├── export.py │ │ ├── image_classification.py │ │ ├── image_regression.py │ │ ├── multi.py │ │ ├── structured_data_classification.py │ │ ├── structured_data_regression.py │ │ ├── text_classification.py │ │ └── text_regression.py │ ├── requirements.txt │ ├── run_py_files.sh │ ├── templates/ │ │ ├── about.md │ │ ├── index.md │ │ ├── install.md │ │ ├── stylesheets/ │ │ │ └── extra.css │ │ └── tutorial/ │ │ ├── faq.md │ │ └── overview.md │ └── tutobooks.py ├── examples/ │ ├── automodel_with_cnn.py │ ├── celeb_age.py │ ├── cifar10.py │ ├── imdb.py │ ├── mnist.py │ ├── new_pop.py │ └── reuters.py ├── pyproject.toml ├── setup.cfg └── shell/ ├── contributors.py ├── contributors.sh ├── copyright.txt ├── cov.sh ├── coverage.sh ├── docs.sh ├── format.sh ├── generate_json.js ├── lint.sh ├── perf.sh ├── pre-commit.sh └── pypi.sh ================================================ FILE CONTENTS ================================================ ================================================ FILE: .devcontainer/Dockerfile ================================================ FROM mcr.microsoft.com/vscode/devcontainers/python:3.10 COPY setup.sh /setup.sh ================================================ FILE: .devcontainer/devcontainer.json ================================================ { "dockerFile": "Dockerfile", "postCreateCommand": "sh /setup.sh", "customizations": { "vscode": { "settings": { "python.linting.enabled": true, "python.linting.flake8Enabled": true, "python.linting.pylintEnabled": false, "python.testing.pytestEnabled": true, "editor.formatOnSave": true, "editor.codeActionsOnSave": { "source.organizeImports": true }, "[python]": { "editor.defaultFormatter": "ms-python.black-formatter" }, "editor.rulers": [ 80 ] }, "extensions": [ "ms-python.python", "ms-python.isort", "ms-python.flake8", "ms-python.black-formatter" ] } }, "features": { "ghcr.io/devcontainers/features/github-cli:1": {} } } ================================================ FILE: .devcontainer/setup.sh ================================================ sudo pip install --upgrade pip sudo pip install -e ".[tests]" echo "sh shell/lint.sh" > .git/hooks/pre-commit chmod a+x .git/hooks/pre-commit ================================================ FILE: .dockerignore ================================================ .git .github *.Dockerfile ================================================ FILE: .github/CODEOWNERS ================================================ * @haifeng-jin @fchollet ================================================ FILE: .github/CODE_OF_CONDUCT.md ================================================ # Contributor Covenant Code of Conduct ## Our Pledge In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. ## Our Standards Examples of behavior that contributes to creating a positive environment include: * Using welcoming and inclusive language * Being respectful of differing viewpoints and experiences * Gracefully accepting constructive criticism * Focusing on what is best for the community * Showing empathy towards other community members Examples of unacceptable behavior by participants include: * The use of sexualized language or imagery and unwelcome sexual attention or advances * Trolling, insulting/derogatory comments, and personal or political attacks * Public or private harassment * Publishing others' private information, such as a physical or electronic address, without explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting ## Our Responsibilities Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. ## Scope This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at jin@tamu.edu. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] [homepage]: http://contributor-covenant.org [version]: http://contributor-covenant.org/version/1/4/ ================================================ FILE: .github/CONTRIBUTING.md ================================================ # Contributing Guide Contributions are welcome, and greatly appreciated! This page is only a guide of the best practices of contributing code to AutoKeras. The best way to contribute is to join our community by reading [this](https://autokeras.com/#contributing-code). We will get you started right away. Follow the tag of [good first issue](https://github.com/keras-team/autokeras/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) for the issues for beginner. ## Pull Request Guide 1. Is this the first pull request that you're making with GitHub? If so, read the guide [Making a pull request to an open-source project](https://github.com/gabrieldemarmiesse/getting_started_open_source). 2. Include "resolves #issue_number" in the description of the pull request if applicable. Briefly describe your contribution. 3. Submit the pull request from the first day of your development and create it as a [draft pull request](https://github.blog/2019-02-14-introducing-draft-pull-requests/). Click `ready for review` when finished and passed the all the checks. 4. For the case of bug fixes, add new test cases which would fail before your bug fix. ## Setup Environment We introduce 3 different options: **GitHub Codespaces**, **VS Code & Dev Containers**, **the general setup**. You can choose base on your preference. ### Option 1: GitHub Codespaces You can simply open the repository in GitHub Codespaces. The environment is already setup there. ### Option 2: VS Code & Dev Containers Open VS Code. Install the `Dev Containers` extension. Press `F1` key. Enter `Dev Containers: Open Folder in Container...` to open the repository root folder. The environment is already setup there. ### Option 3: The General Setup Install [Virtualenvwrapper](https://virtualenvwrapper.readthedocs.io/en/latest/). Create a new virtualenv named `ak` based on python3. ``` mkvirtualenv -p python3 ak ``` Please use this virtualenv for development. Clone the repo. Go to the repo directory. Run the following commands. ``` workon ak pip install -e ".[tests]" pip uninstall autokeras add2virtualenv . ``` ## Run Tests ### GitHub Codespaces or VS Code & Dev Containers If you are using "GitHub Codespaces" or "VS Code & Dev Containers", you can simply open any `*_test.py` file under the `tests` directory, and wait a few seconds, you will see the test tab on the left of the window. ### General Setup If you are using the general setup. Activate the virtualenv. Go to the repo directory Run the following lines to run the tests. Run all the tests. ``` pytest tests ``` Run all the unit tests. ``` pytest tests/autokeras ``` Run all the integration tests. ``` pytest tests/integration_tests ``` ## Code Style You can run the following manually every time you want to format your code. 1. Run `shell/format.sh` to format your code. 2. Run `shell/lint.sh` to check. ## Docstrings Docstrings should follow our style. Just check the style of other docstrings in AutoKeras. ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.md ================================================ --- name: Bug report about: Create a report to help us improve labels: ["bug report"] title: "Bug: " --- ### Bug Description ### Bug Reproduction Code for reproducing the bug: Data used by the code: ### Expected Behavior ### Setup Details Include the details about the versions of: - OS type and version: - Python: - autokeras: - keras-tuner: - scikit-learn: - numpy: - pandas: - tensorflow: ### Additional context ================================================ FILE: .github/ISSUE_TEMPLATE/custom.md ================================================ --- name: Custom issue template about: Describe this issue template's purpose here. --- ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.md ================================================ --- name: Feature request about: Suggest an idea for this project labels: ["feature request"] title: "Feature: " --- ### Feature Description ### Code Example ```python ``` ### Reason ### Solution ================================================ FILE: .github/ISSUE_TEMPLATE/new-task-template.md ================================================ --- name: New Task Template about: Add a new machine learning task labels: ["algorithm"] title: "New Task: " --- ### Suggested Name ### Task Description ### Evaluation Metrics ### Benchmark Datasets ### Reason ### Solution ### Additional Context ================================================ FILE: .github/PULL_REQUEST_TEMPLATE.md ================================================ ### Which issue(s) does this Pull Request fix? resolves #000 ### Details of the Pull Request ================================================ FILE: .github/dependabot.yml ================================================ version: 2 updates: - package-ecosystem: pip directory: "/" schedule: interval: daily time: "11:00" open-pull-requests-limit: 10 ignore: - dependency-name: sphinx versions: - ">= 3.1.a, < 3.2" - dependency-name: typeguard versions: - ">= 2.11.a, < 2.12" - dependency-name: typeguard versions: - ">= 2.12.a, < 2.13" ================================================ FILE: .github/workflows/actions.yml ================================================ name: Tests on: push: branches: [ master ] pull_request: release: types: [created] jobs: build: name: Run tests runs-on: ubuntu-latest env: KERAS_BACKEND: torch steps: - uses: actions/checkout@v3 - name: Set up Python 3.10 uses: actions/setup-python@v4 with: python-version: '3.10' - name: Get pip cache dir id: pip-cache run: | python -m pip install --upgrade pip setuptools echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT - name: pip cache uses: actions/cache@v3 with: path: ${{ steps.pip-cache.outputs.dir }} key: ${{ runner.os }}-pip-${{ hashFiles('pyproject.toml') }} - name: Install dependencies run: | pip install torch --index-url https://download.pytorch.org/whl/cpu pip install -e ".[tests]" --progress-bar off - name: Test with pytest run: | pytest --cov=autokeras --cov-report xml:coverage.xml - name: Codecov uses: codecov/codecov-action@v3 with: token: ${{ secrets.CODECOV_TOKEN }} files: ./coverage.xml flags: unittests fail_ci_if_error: true format: name: Check the code format runs-on: ubuntu-latest env: KERAS_BACKEND: torch steps: - uses: actions/checkout@v3 - name: Run pre-commit run: bash shell/pre-commit.sh build-docs: name: Build the docs runs-on: ubuntu-latest env: KERAS_BACKEND: torch steps: - uses: actions/checkout@v3 - name: Set up Python 3.10 uses: actions/setup-python@v4 with: python-version: '3.10' - name: Install dependencies run: | python -m pip install --upgrade pip setuptools pip install torch --index-url https://download.pytorch.org/whl/cpu pip install -e . pip install -r docs/requirements.txt - name: Build the docs run: | cd docs python autogen.py mkdocs build deploy: needs: [build, format, build-docs] if: github.event_name == 'release' && github.event.action == 'created' runs-on: ubuntu-latest env: KERAS_BACKEND: torch steps: - uses: actions/checkout@v3 - name: Set up Python uses: actions/setup-python@v4 with: python-version: '3.10' - name: Install dependencies run: | python -m pip install --upgrade pip build twine - name: Build and publish env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | python -m build twine upload dist/* ================================================ FILE: .gitignore ================================================ # vim swp files *.swp # caffe/pytorch model files *.pth # Mkdocs /docs/sources /docs/site .DS_Store .vscode settings.json .idea .pytest_cache /experiments # resource temp folder tests/resources/temp/* !tests/resources/temp/.gitkeep # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* cov.xml .cache nosetests.xml coverage.xml *.cover .hypothesis/ # Translations *.mo *.pot # Django stuff: *.log .static_storage/ .media/ local_settings.py # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ examples/text_cnn/glove_embedding/ ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2020 The AutoKeras Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================

logo

[![](https://github.com/keras-team/autokeras/workflows/Tests/badge.svg)](https://github.com/keras-team/autokeras/actions?query=workflow%3ATests+branch%3Amaster) [![codecov](https://codecov.io/gh/keras-team/autokeras/branch/master/graph/badge.svg)](https://codecov.io/gh/keras-team/autokeras) [![PyPI version](https://badge.fury.io/py/autokeras.svg)](https://badge.fury.io/py/autokeras) [![contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat)](https://github.com/keras-team/autokeras/issues) Official Website: [autokeras.com](https://autokeras.com) ## AutoKeras: An AutoML system based on Keras. It is developed by DATA Lab at Texas A&M University. The goal of AutoKeras is to make machine learning accessible to everyone. ## Learning resources * A short example. ```python import autokeras as ak clf = ak.ImageClassifier() clf.fit(x_train, y_train) results = clf.predict(x_test) ``` * [Official website tutorials](https://autokeras.com/tutorial/overview/). * The book of [*Automated Machine Learning in Action*](https://www.manning.com/books/automated-machine-learning-in-action?query=automated&utm_source=jin&utm_medium=affiliate&utm_campaign=affiliate&a_aid=jin). * The LiveProjects of [*Image Classification with AutoKeras*](https://www.manning.com/liveprojectseries/autokeras-ser).

drawing     drawing

## Installation To install the package, please use the `pip` installation as follows: ```shell pip3 install autokeras ``` Please follow the [installation guide](https://autokeras.com/install) for more details. **Note:** Currently, AutoKeras is only compatible with **Python >= 3.7** and **TensorFlow >= 2.8.0**. ## Community Ask your questions on our [GitHub Discussions](https://github.com/keras-team/autokeras/discussions). ## Contributing Code Here is how we manage our project. We pick the critical issues to work on from [GitHub issues](https://github.com/keras-team/autokeras/issues). They will be added to this [Project](https://github.com/keras-team/autokeras/projects/3). Some of the issues will then be added to the [milestones](https://github.com/keras-team/autokeras/milestones), which are used to plan for the releases. Refer to our [Contributing Guide](https://autokeras.com/contributing/) to learn the best practices. Thank all the contributors! [![The contributors](https://autokeras.com/img/contributors.svg)](https://github.com/keras-team/autokeras/graphs/contributors) ## Cite this work Haifeng Jin, François Chollet, Qingquan Song, and Xia Hu. "AutoKeras: An AutoML Library for Deep Learning." *the Journal of machine Learning research* 6 (2023): 1-6. ([Download](http://jmlr.org/papers/v24/20-1355.html)) Biblatex entry: ```bibtex @article{JMLR:v24:20-1355, author = {Haifeng Jin and François Chollet and Qingquan Song and Xia Hu}, title = {AutoKeras: An AutoML Library for Deep Learning}, journal = {Journal of Machine Learning Research}, year = {2023}, volume = {24}, number = {6}, pages = {1--6}, url = {http://jmlr.org/papers/v24/20-1355.html} } ``` ## Acknowledgements The authors gratefully acknowledge the D3M program of the Defense Advanced Research Projects Agency (DARPA) administered through AFRL contract FA8750-17-2-0116; the Texas A&M College of Engineering, and Texas A&M University. ================================================ FILE: RELEASE.md ================================================ # Release v2.0.0 ## Breaking changes * Requires `keras>=3.0.0` instead of `tf.keras`. * Removed the structured data related tasks by removing the following public APIs: * `CategoricalToNumerical` * `MultiCategoryEncoding` * `StructuredDataInput` * `StructuredDataBlock` * `StructuredDataClassifier` * `StructuredDataRegressor` * Removed the Time series related tasks by removing the following public APIs: * `TimeseriesInput` * `TimeseriesForecaster` * Reduced search space of Text related tasks by removing the following blocks. * `Embedding` * `TextToIntSequence` * `TextToNgramVector` * `Transformer` # Release v1.1.0 ## Breaking changes * This only affect you if you use `BertTokenizer` or `BertEncoder` in AutoKeras explicity. You are not affected if you only use `BertBlock`, `TextClassifier` or `TextRegressor`. Removed the AutoKeras implementation of `BertTokenizer` and `BertEncoder`. Use `keras-nlp` implementation instead. ## New features ## Bug fixes * Now also support `numpy>=1.24`. ================================================ FILE: autokeras/__init__.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from autokeras.auto_model import AutoModel from autokeras.blocks import ClassificationHead from autokeras.blocks import ConvBlock from autokeras.blocks import DenseBlock from autokeras.blocks import EfficientNetBlock from autokeras.blocks import Embedding from autokeras.blocks import Flatten from autokeras.blocks import ImageAugmentation from autokeras.blocks import ImageBlock from autokeras.blocks import Merge from autokeras.blocks import Normalization from autokeras.blocks import RegressionHead from autokeras.blocks import ResNetBlock from autokeras.blocks import RNNBlock from autokeras.blocks import SpatialReduction from autokeras.blocks import StructuredDataBlock from autokeras.blocks import TemporalReduction from autokeras.blocks import TextBlock from autokeras.blocks import XceptionBlock from autokeras.engine.block import Block from autokeras.engine.head import Head from autokeras.engine.node import Node from autokeras.keras_layers import CastToFloat32 from autokeras.keras_layers import ExpandLastDim from autokeras.nodes import ImageInput from autokeras.nodes import Input from autokeras.nodes import StructuredDataInput from autokeras.nodes import TextInput from autokeras.tasks import ImageClassifier from autokeras.tasks import ImageRegressor from autokeras.tasks import StructuredDataClassifier from autokeras.tasks import StructuredDataRegressor from autokeras.tasks import TextClassifier from autokeras.tasks import TextRegressor from autokeras.tuners import BayesianOptimization from autokeras.tuners import Greedy from autokeras.tuners import Hyperband from autokeras.tuners import RandomSearch __version__ = "3.0.0" CUSTOM_OBJECTS = { "CastToFloat32": CastToFloat32, "ExpandLastDim": ExpandLastDim, } ================================================ FILE: autokeras/adapters/__init__.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from autokeras.adapters.input_adapters import ImageAdapter from autokeras.adapters.input_adapters import InputAdapter from autokeras.adapters.input_adapters import StructuredDataAdapter from autokeras.adapters.input_adapters import TextAdapter from autokeras.adapters.output_adapters import ClassificationAdapter from autokeras.adapters.output_adapters import RegressionAdapter ================================================ FILE: autokeras/adapters/input_adapters.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np from autokeras.engine import adapter as adapter_module class InputAdapter(adapter_module.Adapter): def check(self, x): """Record any information needed by transform.""" if not isinstance(x, np.ndarray): raise TypeError( "Expect the data to Input to be numpy.ndarray, " "but got {type}.".format(type=type(x)) ) if isinstance(x, np.ndarray) and not np.issubdtype(x.dtype, np.number): raise TypeError( "Expect the data to Input to be numerical, but got " "{type}.".format(type=x.dtype) ) class ImageAdapter(adapter_module.Adapter): def check(self, x): """Record any information needed by transform.""" if not isinstance(x, np.ndarray): raise TypeError( "Expect the data to ImageInput to be numpy.ndarray, " "but got {type}.".format(type=type(x)) ) if isinstance(x, np.ndarray) and not np.issubdtype(x.dtype, np.number): raise TypeError( "Expect the data to ImageInput to be numerical, but got " "{type}.".format(type=x.dtype) ) class TextAdapter(adapter_module.Adapter): def check(self, x): """Record any information needed by transform.""" if not isinstance(x, np.ndarray): raise TypeError( "Expect the data to TextInput to be numpy.ndarray, " "but got {type}.".format(type=type(x)) ) class StructuredDataAdapter(adapter_module.Adapter): def check(self, x): if not isinstance(x, np.ndarray): raise TypeError( "Unsupported type {type} for " "{name}.".format(type=type(x), name=self.__class__.__name__) ) ================================================ FILE: autokeras/adapters/input_adapters_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import pandas as pd import pytest from autokeras import test_utils from autokeras.adapters import input_adapters from autokeras.utils import data_utils def test_image_input_adapter_transform_to_dataset(): x = test_utils.generate_data() adapter = input_adapters.ImageAdapter() assert isinstance(adapter.adapt(x), np.ndarray) def test_image_input_unsupported_type(): x = "unknown" adapter = input_adapters.ImageAdapter() with pytest.raises(TypeError) as info: x = adapter.adapt(x) assert "Expect the data to ImageInput to be numpy" in str(info.value) def test_image_input_numerical(): x = np.array([[["unknown"]]]) adapter = input_adapters.ImageAdapter() with pytest.raises(TypeError) as info: x = adapter.adapt(x) assert "Expect the data to ImageInput to be numerical" in str(info.value) def test_input_type_error(): x = "unknown" adapter = input_adapters.InputAdapter() with pytest.raises(TypeError) as info: x = adapter.adapt(x) assert "Expect the data to Input to be numpy" in str(info.value) def test_input_numerical(): x = np.array([[["unknown"]]]) adapter = input_adapters.InputAdapter() with pytest.raises(TypeError) as info: x = adapter.adapt(x) assert "Expect the data to Input to be numerical" in str(info.value) def test_text_adapt_np(): x = np.array(["a b c", "b b c"]) adapter = input_adapters.TextAdapter() x = adapter.adapt(x) assert data_utils.dataset_shape(x) == [2] assert isinstance(x, np.ndarray) def test_text_input_type_error(): x = "unknown" adapter = input_adapters.TextAdapter() with pytest.raises(TypeError) as info: x = adapter.adapt(x) assert "Expect the data to TextInput to be numpy" in str(info.value) def test_structured_data_input_unsupported_type_error(): with pytest.raises(TypeError) as info: adapter = input_adapters.StructuredDataAdapter() adapter.adapt("unknown") assert "Unsupported type" in str(info.value) def test_structured_data_input_transform_to_dataset(): x = pd.read_csv(test_utils.TRAIN_CSV_PATH).to_numpy().astype(str) adapter = input_adapters.StructuredDataAdapter() x = adapter.adapt(x) assert isinstance(x, np.ndarray) ================================================ FILE: autokeras/adapters/output_adapters.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np from autokeras.engine import adapter as adapter_module class HeadAdapter(adapter_module.Adapter): def __init__(self, name, **kwargs): super().__init__(**kwargs) self.name = name def check(self, dataset): if not isinstance(dataset, np.ndarray): raise TypeError( f"Expect the target data of {self.name} to be" f" np.ndarray, but got {type(dataset)}." ) class ClassificationAdapter(HeadAdapter): pass class RegressionAdapter(HeadAdapter): pass ================================================ FILE: autokeras/adapters/output_adapters_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import pytest from autokeras.adapters import output_adapters def test_unsupported_types_error(): adapter = output_adapters.ClassificationAdapter(name="a") with pytest.raises(TypeError) as info: adapter.adapt(1) assert "Expect the target data of a to be" in str(info.value) def test_reg_head_transform_1d_np(): adapter = output_adapters.RegressionAdapter(name="a") y = adapter.adapt(np.random.rand(10)) assert isinstance(y, np.ndarray) ================================================ FILE: autokeras/analysers/__init__.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from autokeras.analysers.input_analysers import CATEGORICAL from autokeras.analysers.input_analysers import NUMERICAL from autokeras.analysers.input_analysers import ImageAnalyser from autokeras.analysers.input_analysers import InputAnalyser from autokeras.analysers.input_analysers import StructuredDataAnalyser from autokeras.analysers.input_analysers import TextAnalyser from autokeras.analysers.output_analysers import ClassificationAnalyser from autokeras.analysers.output_analysers import RegressionAnalyser ================================================ FILE: autokeras/analysers/input_analysers.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np from autokeras.engine import analyser CATEGORICAL = "categorical" NUMERICAL = "numerical" class InputAnalyser(analyser.Analyser): def finalize(self): return class ImageAnalyser(InputAnalyser): def __init__(self, **kwargs): super().__init__(**kwargs) def finalize(self): if len(self.shape) not in [3, 4]: raise ValueError( "Expect the data to ImageInput to have shape (batch_size, " "height, width, channels) or (batch_size, height, width) " "dimensions, but got input shape {shape}".format( shape=self.shape ) ) class TextAnalyser(InputAnalyser): def correct_shape(self): if len(self.shape) == 1: return True return len(self.shape) == 2 and self.shape[1] == 1 def finalize(self): if not self.correct_shape(): raise ValueError( "Expect the data to TextInput to have shape " "(batch_size, 1), but " "got input shape {shape}.".format(shape=self.shape) ) if not (self.dtype == "string"): raise TypeError( "Expect the data to TextInput to be strings, but got " "{type}.".format(type=self.dtype) ) class StructuredDataAnalyser(InputAnalyser): def __init__(self, column_names=None, column_types=None, **kwargs): super().__init__(**kwargs) self.column_names = column_names self.column_types = column_types # Variables for inferring column types. self.count_numerical = None self.count_categorical = None self.count_unique_numerical = [] self.num_col = None def update(self, data): super().update(data) # The super class set self.dtype to "string" based on the input numpy # array. However, the preprocessor will encode it to float32. So, set # self.dtype to "float32", which would be propagated to the Keras Input # node. self.dtype = "float32" if len(self.shape) != 2: return # data is a numpy array containing all the data. self.num_col = data.shape[1] self.count_numerical = np.zeros(self.num_col) self.count_categorical = np.zeros(self.num_col) self.count_unique_numerical = [{} for _ in range(self.num_col)] for i in range(self.num_col): self._update_column(data[:, i], i) def _update_column(self, column_data, i): # Vectorized check for numerical values def is_numerical(x): try: float(x) return True except (ValueError, TypeError): return False numerical_mask = np.vectorize(is_numerical)(column_data) self.count_numerical[i] = np.sum(numerical_mask) self.count_categorical[i] = len(column_data) - np.sum(numerical_mask) if np.any(numerical_mask): # Get numerical values numerical_values = column_data[numerical_mask] # Handle bytes numerical_values = np.array( [ x.decode("utf-8") if isinstance(x, bytes) else x for x in numerical_values ] ) numerical_floats = numerical_values.astype(float) unique_vals, counts = np.unique( numerical_floats, return_counts=True ) self.count_unique_numerical[i] = dict(zip(unique_vals, counts)) def finalize(self): self.check() self.infer_column_types() def get_input_name(self): return "StructuredDataInput" def check(self): if len(self.shape) != 2: raise ValueError( "Expect the data to {input_name} to have shape " "(batch_size, num_features), but " "got input shape {shape}.".format( input_name=self.get_input_name(), shape=self.shape ) ) # Fill in the column_names if self.column_names is None: if self.column_types: raise ValueError( "column_names must be specified, if " "column_types is specified." ) self.column_names = [str(index) for index in range(self.shape[1])] # Check if column_names has the correct length. if len(self.column_names) != self.shape[1]: raise ValueError( "Expect column_names to have length {expect} " "but got {actual}.".format( expect=self.shape[1], actual=len(self.column_names) ) ) def infer_column_types(self): column_types = {} for i in range(self.num_col): if self.count_categorical[i] > 0: column_types[self.column_names[i]] = CATEGORICAL elif ( len(self.count_unique_numerical[i]) / self.count_numerical[i] < 0.05 ): column_types[self.column_names[i]] = CATEGORICAL else: column_types[self.column_names[i]] = NUMERICAL # Partial column_types is provided. if self.column_types is None: self.column_types = {} for key, value in column_types.items(): if key not in self.column_types: self.column_types[key] = value ================================================ FILE: autokeras/analysers/input_analysers_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import copy import numpy as np import pandas as pd import pytest from autokeras import test_utils from autokeras.analysers import input_analysers def test_structured_data_input_less_col_name_error(): with pytest.raises(ValueError) as info: analyser = input_analysers.StructuredDataAnalyser( column_names=list(range(8)) ) dataset = np.random.rand(20, 10) analyser.update(dataset) analyser.finalize() assert "Expect column_names to have length" in str(info.value) def test_structured_data_infer_col_types(): analyser = input_analysers.StructuredDataAnalyser( column_names=test_utils.COLUMN_NAMES, column_types=None, ) x = pd.read_csv(test_utils.TRAIN_CSV_PATH) x.pop("survived") dataset = x.values.astype(str) analyser.update(dataset) analyser.finalize() assert analyser.column_types == test_utils.COLUMN_TYPES def test_dont_infer_specified_column_types(): column_types = copy.copy(test_utils.COLUMN_TYPES) column_types.pop("sex") column_types["age"] = "categorical" analyser = input_analysers.StructuredDataAnalyser( column_names=test_utils.COLUMN_NAMES, column_types=column_types, ) x = pd.read_csv(test_utils.TRAIN_CSV_PATH) x.pop("survived") dataset = x.values.astype(str) analyser.update(dataset) analyser.finalize() assert analyser.column_types["age"] == "categorical" def test_structured_data_input_with_illegal_dim(): analyser = input_analysers.StructuredDataAnalyser( column_names=test_utils.COLUMN_NAMES, column_types=None, ) dataset = np.random.rand(100, 32, 32) with pytest.raises(ValueError) as info: analyser.update(dataset) analyser.finalize() assert "Expect the data to StructuredDataInput to have shape" in str( info.value ) def test_image_input_analyser_shape_is_list_of_int(): analyser = input_analysers.ImageAnalyser() dataset = np.random.rand(100, 32, 32, 3) analyser.update(dataset) analyser.finalize() assert isinstance(analyser.shape, list) assert all(map(lambda x: isinstance(x, int), analyser.shape)) def test_image_input_with_three_dim(): analyser = input_analysers.ImageAnalyser() dataset = np.random.rand(100, 32, 32) analyser.update(dataset) analyser.finalize() assert len(analyser.shape) == 3 def test_image_input_with_illegal_dim(): analyser = input_analysers.ImageAnalyser() dataset = np.random.rand(100, 32) with pytest.raises(ValueError) as info: analyser.update(dataset) analyser.finalize() assert "Expect the data to ImageInput to have shape" in str(info.value) def test_text_input_with_illegal_dim(): analyser = input_analysers.TextAnalyser() dataset = np.random.rand(100, 32) with pytest.raises(ValueError) as info: analyser.update(dataset) analyser.finalize() assert "Expect the data to TextInput to have shape" in str(info.value) def test_text_analyzer_with_one_dim_doesnt_crash(): analyser = input_analysers.TextAnalyser() dataset = np.array(["a b c", "b b c"]) analyser.update(dataset) analyser.finalize() def test_text_illegal_type_error(): analyser = input_analysers.TextAnalyser() dataset = np.random.rand(100, 1) with pytest.raises(TypeError) as info: analyser.update(dataset) analyser.finalize() assert "Expect the data to TextInput to be strings" in str(info.value) ================================================ FILE: autokeras/analysers/output_analysers.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np from autokeras.engine import analyser class TargetAnalyser(analyser.Analyser): def __init__(self, name=None, **kwargs): super().__init__(**kwargs) self.name = name class ClassificationAnalyser(TargetAnalyser): def __init__(self, num_classes=None, multi_label=False, **kwargs): super().__init__(**kwargs) self.num_classes = num_classes self.label_encoder = None self.multi_label = multi_label self.labels = set() def update(self, data): super().update(data) if len(self.shape) > 2: raise ValueError( "Expect the target data for {name} to have shape " "(batch_size, num_classes), " "but got {shape}.".format(name=self.name, shape=self.shape) ) if len(self.shape) > 1 and self.shape[1] > 1: return self.labels = self.labels.union(set(np.unique(data))) def finalize(self): # TODO: support raw string labels for multi-label. self.labels = sorted(list(self.labels)) # Infer the num_classes if not specified. if not self.num_classes: if self.encoded: # Single column with 0s and 1s. if len(self.shape) == 1 or self.shape[1:] == [1]: self.num_classes = 2 else: self.num_classes = self.shape[1] else: self.num_classes = len(self.labels) if self.num_classes < 2: raise ValueError( "Expect the target data for {name} to have " "at least 2 classes, but got {num_classes}.".format( name=self.name, num_classes=self.num_classes ) ) # Check shape equals expected shape. expected = self.get_expected_shape() actual = self.shape[1:] if len(actual) == 0: actual = [1] if self.encoded and actual != expected: raise ValueError( "Expect the target data for {name} to have " "shape {expected}, but got {actual}.".format( name=self.name, expected=expected, actual=self.shape[1:] ) ) def get_expected_shape(self): # Compute expected shape from num_classes. if self.num_classes == 2 and not self.multi_label: return [1] return [self.num_classes] @property def encoded(self): return self.encoded_for_sigmoid or self.encoded_for_softmax @property def encoded_for_sigmoid(self): if len(self.labels) != 2: return False return sorted(self.labels) == [0, 1] @property def encoded_for_softmax(self): return len(self.shape) > 1 and self.shape[1] > 1 class RegressionAnalyser(TargetAnalyser): def __init__(self, output_dim=None, **kwargs): super().__init__(**kwargs) self.output_dim = output_dim def finalize(self): if self.output_dim and (self.expected_dim() != self.output_dim): raise ValueError( "Expect the target data for {name} to have shape " "(batch_size, {output_dim}), " "but got {shape}.".format( name=self.name, output_dim=self.output_dim, shape=self.shape ) ) def expected_dim(self): if len(self.shape) == 1: return 1 return self.shape[1] ================================================ FILE: autokeras/analysers/output_analysers_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import pytest from autokeras import test_utils from autokeras.analysers import output_analysers def test_clf_head_one_hot_shape_error(): analyser = output_analysers.ClassificationAnalyser(name="a", num_classes=9) dataset = test_utils.generate_one_hot_labels(num_classes=10) with pytest.raises(ValueError) as info: analyser.update(dataset) analyser.finalize() assert "Expect the target data for a to have shape" in str(info.value) def test_clf_head_more_dim_error(): analyser = output_analysers.ClassificationAnalyser(name="a", num_classes=9) dataset = np.random.rand(100, 32, 32, 3) with pytest.raises(ValueError) as info: analyser.update(dataset) analyser.finalize() assert "Expect the target data for a to have shape" in str(info.value) def test_wrong_num_classes_error(): analyser = output_analysers.ClassificationAnalyser(name="a", num_classes=5) dataset = np.random.rand(10, 3) with pytest.raises(ValueError) as info: analyser.update(dataset) analyser.finalize() assert "Expect the target data for a to have shape" in str(info.value) def test_one_class_error(): analyser = output_analysers.ClassificationAnalyser(name="a") dataset = np.array(["a", "a", "a"]) with pytest.raises(ValueError) as info: analyser.update(dataset) analyser.finalize() assert "Expect the target data for a to have at least 2 classes" in str( info.value ) def test_infer_ten_classes(): analyser = output_analysers.ClassificationAnalyser(name="a") dataset = test_utils.generate_one_hot_labels(num_classes=10) analyser.update(dataset) analyser.finalize() assert analyser.num_classes == 10 def test_infer_single_column_two_classes(): analyser = output_analysers.ClassificationAnalyser(name="a") dataset = np.random.randint(0, 2, 10) analyser.update(dataset) analyser.finalize() assert analyser.num_classes == 2 def test_specify_five_classes(): analyser = output_analysers.ClassificationAnalyser(name="a", num_classes=5) dataset = np.random.rand(10, 5) analyser.update(dataset) analyser.finalize() assert analyser.num_classes == 5 def test_specify_two_classes_fit_single_column(): analyser = output_analysers.ClassificationAnalyser(name="a", num_classes=2) dataset = np.random.rand(10, 1) analyser.update(dataset) analyser.finalize() assert analyser.num_classes == 2 def test_multi_label_two_classes_has_two_columns(): analyser = output_analysers.ClassificationAnalyser( name="a", multi_label=True ) dataset = np.random.rand(10, 2) analyser.update(dataset) analyser.finalize() assert analyser.encoded def test_reg_with_specified_output_dim_error(): analyser = output_analysers.RegressionAnalyser(name="a", output_dim=3) dataset = np.random.rand(10, 2) with pytest.raises(ValueError) as info: analyser.update(dataset) analyser.finalize() assert "Expect the target data for a to have shape" in str(info.value) def test_reg_with_specified_output_dim_and_single_column_doesnt_crash(): analyser = output_analysers.RegressionAnalyser(name="a", output_dim=1) dataset = np.random.rand(10, 1) analyser.update(dataset) analyser.finalize() def test_regression_analyser_expected_dim_1d(): analyser = output_analysers.RegressionAnalyser() analyser.shape = [10] # 1D shape assert analyser.expected_dim() == 1 def test_regression_analyser_expected_dim_2d(): analyser = output_analysers.RegressionAnalyser() analyser.shape = [10, 3] # 2D shape assert analyser.expected_dim() == 3 ================================================ FILE: autokeras/auto_model.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from pathlib import Path from typing import List from typing import Optional from typing import Type from typing import Union import keras import numpy as np import tree from autokeras import blocks from autokeras import graph as graph_module from autokeras import pipeline from autokeras import tuners from autokeras.engine import head as head_module from autokeras.engine import node as node_module from autokeras.engine import tuner from autokeras.nodes import Input from autokeras.utils import data_utils from autokeras.utils import utils TUNER_CLASSES = { "bayesian": tuners.BayesianOptimization, "random": tuners.RandomSearch, "hyperband": tuners.Hyperband, "greedy": tuners.Greedy, } def get_tuner_class(tuner): if isinstance(tuner, str) and tuner in TUNER_CLASSES: return TUNER_CLASSES.get(tuner) else: raise ValueError( 'Expected the tuner argument to be one of "greedy", ' '"random", "hyperband", or "bayesian", ' "but got {tuner}".format(tuner=tuner) ) class AutoModel(object): """A Model defined by inputs and outputs. AutoModel combines a HyperModel and a Tuner to tune the HyperModel. The user can use it in a similar way to a Keras model since it also has `fit()` and `predict()` methods. The AutoModel has two use cases. In the first case, the user only specifies the input nodes and output heads of the AutoModel. The AutoModel infers the rest part of the model. In the second case, user can specify the high-level architecture of the AutoModel by connecting the Blocks with the functional API, which is the same as the Keras [functional API](https://keras.io/api/models/model/#with-the-functional-api). # Example ```python # The user only specifies the input nodes and output heads. import autokeras as ak ak.AutoModel( inputs=[ak.ImageInput(), ak.TextInput()], outputs=[ak.ClassificationHead(), ak.RegressionHead()] ) ``` ```python # The user specifies the high-level architecture. import autokeras as ak image_input = ak.ImageInput() image_output = ak.ImageBlock()(image_input) text_input = ak.TextInput() text_output = ak.TextBlock()(text_input) output = ak.Merge()([image_output, text_output]) classification_output = ak.ClassificationHead()(output) regression_output = ak.RegressionHead()(output) ak.AutoModel( inputs=[image_input, text_input], outputs=[classification_output, regression_output] ) ``` # Arguments inputs: A list of Node instances. The input node(s) of the AutoModel. outputs: A list of Node or Head instances. The output node(s) or head(s) of the AutoModel. project_name: String. The name of the AutoModel. Defaults to 'auto_model'. max_trials: Int. The maximum number of different Keras Models to try. The search may finish before reaching the max_trials. Defaults to 100. directory: String. The path to a directory for storing the search outputs. Defaults to None, which would create a folder with the name of the AutoModel in the current directory. objective: String. Name of model metric to minimize or maximize, e.g. 'val_accuracy'. Defaults to 'val_loss'. tuner: String or subclass of AutoTuner. If string, it should be one of 'greedy', 'bayesian', 'hyperband' or 'random'. It can also be a subclass of AutoTuner. Defaults to 'greedy'. overwrite: Boolean. Defaults to `False`. If `False`, reloads an existing project of the same name if one is found. Otherwise, overwrites the project. seed: Int. Random seed. max_model_size: Int. Maximum number of scalars in the parameters of a model. Models larger than this are rejected. **kwargs: Any arguments supported by keras_tuner.Tuner. """ def __init__( self, inputs: Union[Input, List[Input]], outputs: Union[head_module.Head, node_module.Node, list], project_name: str = "auto_model", max_trials: int = 100, directory: Union[str, Path, None] = None, objective: str = "val_loss", tuner: Union[str, Type[tuner.AutoTuner]] = "greedy", overwrite: bool = False, seed: Optional[int] = None, max_model_size: Optional[int] = None, **kwargs ): self.inputs = tree.flatten(inputs) self.outputs = tree.flatten(outputs) self.seed = seed if seed: np.random.seed(seed) # TODO: Support passing a tuner instance. # Initialize the hyper_graph. graph = self._build_graph() if isinstance(tuner, str): tuner = get_tuner_class(tuner) self.tuner = tuner( hypermodel=graph, overwrite=overwrite, objective=objective, max_trials=max_trials, directory=directory, seed=self.seed, project_name=project_name, max_model_size=max_model_size, **kwargs ) self.overwrite = overwrite self._heads = [output_node.in_blocks[0] for output_node in self.outputs] @property def objective(self): return self.tuner.objective @property def max_trials(self): return self.tuner.max_trials @property def directory(self): return self.tuner.directory @property def project_name(self): return self.tuner.project_name def _assemble(self): """Assemble the Blocks based on the input output nodes.""" inputs = tree.flatten(self.inputs) outputs = tree.flatten(self.outputs) middle_nodes = [ input_node.get_block()(input_node) for input_node in inputs ] # Merge the middle nodes. if len(middle_nodes) > 1: output_node = blocks.Merge()(middle_nodes) else: output_node = middle_nodes[0] outputs = tree.flatten( [output_blocks(output_node) for output_blocks in outputs] ) return graph_module.Graph(inputs=inputs, outputs=outputs) def _build_graph(self): # Using functional API. if all( [isinstance(output, node_module.Node) for output in self.outputs] ): graph = graph_module.Graph(inputs=self.inputs, outputs=self.outputs) # Using input/output API. elif all( [isinstance(output, head_module.Head) for output in self.outputs] ): # Clear session to reset get_uid(). The names of the blocks will # start to count from 1 for new blocks in a new AutoModel # afterwards. When initializing multiple AutoModel with Task API, # if not counting from 1 for each of the AutoModel, the predefined # hp values in task specifiec tuners would not match the names. keras.backend.clear_session() graph = self._assemble() self.outputs = graph.outputs keras.backend.clear_session() return graph def fit( self, x=None, y=None, batch_size=32, epochs=None, callbacks=None, validation_split=0.2, validation_data=None, verbose=1, **kwargs ): """Search for the best model and hyperparameters for the AutoModel. It will search for the best model based on the performances on validation data. # Arguments x: numpy.ndarray. Training data x. y: numpy.ndarray. Training data y. batch_size: Int. Number of samples per gradient update. Defaults to 32. epochs: Int. The number of epochs to train each model during the search. If unspecified, by default we train for a maximum of 1000 epochs, but we stop training if the validation loss stops improving for 10 epochs (unless you specified an EarlyStopping callback as part of the callbacks argument, in which case the EarlyStopping callback you specified will determine early stopping). callbacks: List of Keras callbacks to apply during training and validation. validation_split: Float between 0 and 1. Defaults to 0.2. Fraction of the training data to be used as validation data. The model will set apart this fraction of the training data, will not train on it, and will evaluate the loss and any model metrics on this data at the end of each epoch. The validation data is selected from the last samples in the `x` and `y` data provided, before shuffling. This argument is not supported when `x` is a dataset. The best model found would be fit on the entire dataset including the validation data. validation_data: Data on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. `validation_data` will override `validation_split`. The type of the validation data should be the same as the training data. The best model found would be fit on the training dataset without the validation data. verbose: 0, 1, or 2. Verbosity mode. 0 = silent, 1 = progress bar, 2 = one line per epoch. Note that the progress bar is not particularly useful when logged to a file, so verbose=2 is recommended when not running interactively (eg, in a production environment). Controls the verbosity of both KerasTuner search and [keras.Model.fit](https://keras.io/api/models/model_training_apis/#fit-method) **kwargs: Any arguments supported by [keras.Model.fit](https://keras.io/api/models/model_training_apis/#fit-method). # Returns history: A Keras History object corresponding to the best model. Its History.history attribute is a record of training loss values and metrics values at successive epochs, as well as validation loss values and validation metrics values (if applicable). """ # Check validation information. if not validation_data and not validation_split: raise ValueError( "Either validation_data or a non-zero validation_split " "should be provided." ) if validation_data: validation_split = 0 dataset, validation_data = self._check_and_adapt( x=x, y=y, validation_data=validation_data ) self._analyze_data(dataset) self._build_hyper_pipeline() # Split the data with validation_split. if validation_data is None and validation_split: dataset, validation_data = data_utils.split_dataset( dataset, validation_split ) x, y = dataset history = self.tuner.search( x=x, y=y, epochs=epochs, callbacks=callbacks, validation_data=validation_data, validation_split=validation_split, verbose=verbose, batch_size=batch_size, **kwargs ) return history def _adapt(self, dataset, hms): sources = tree.flatten(dataset) adapted = [] for source, hm in zip(sources, hms): source = hm.get_adapter().adapt(source) adapted.append(source) if len(adapted) == 1: return adapted[0] return tuple(adapted) def _check_numpy_arrays(self, data, name, in_val=""): """Check if all elements in the nested structure are numpy arrays.""" if not all([isinstance(a, np.ndarray) for a in tree.flatten(data)]): raise ValueError( "Expected " "{name}{in_val} to be a numpy array, got {type}".format( name=name, in_val=in_val, type=[type(a) for a in tree.flatten(data)], ) ) def _check_array_count(self, actual, expected, name, in_val): """Check if the number of arrays matches the expected count.""" if actual != expected: raise ValueError( "Expected {name}{in_val} to have {expected} arrays, " "but got {actual}".format( name=name, in_val=in_val, expected=expected, actual=actual, ) ) def _check_data_format(self, x, y, validation=False, predict=False): """Check if the dataset has the same number of IOs with the model.""" if validation: in_val = " in validation_data" else: in_val = "" self._check_numpy_arrays(x, "x", in_val) if y is not None: self._check_numpy_arrays(y, "y", in_val) self._check_array_count( len(tree.flatten(x)), len(self.inputs), "x", in_val ) # When predicting, y is not required. if not predict and y is not None: self._check_array_count( len(tree.flatten(y)), len(self.outputs), "y", in_val ) def _analyze_data(self, dataset): input_analysers = [node.get_analyser() for node in self.inputs] output_analysers = [head.get_analyser() for head in self._heads] analysers = input_analysers + output_analysers np_arrays = tree.flatten(dataset) for array, analyser in zip(np_arrays, analysers): analyser.update(array) for analyser in analysers: analyser.finalize() for hm, analyser in zip(self.inputs + self._heads, analysers): hm.config_from_analyser(analyser) def _build_hyper_pipeline(self): self.tuner.hyper_pipeline = pipeline.HyperPipeline( inputs=[node.get_hyper_preprocessors() for node in self.inputs], outputs=[head.get_hyper_preprocessors() for head in self._heads], ) self.tuner.hypermodel.hyper_pipeline = self.tuner.hyper_pipeline def _check_and_adapt(self, x, y, validation_data): # Convert training data. self._check_data_format(x, y) x = self._adapt(x, self.inputs) y = self._adapt(y, self._heads) # Convert validation data if validation_data: self._check_data_format(*validation_data, validation=True) x_val, y_val = validation_data x_val = self._adapt(x_val, self.inputs) y_val = self._adapt(y_val, self._heads) validation_data = (x_val, y_val) return (x, y), validation_data def predict(self, x, batch_size=32, verbose=1, **kwargs): """Predict the output for a given testing data. # Arguments x: Any allowed types according to the input node. Testing data. batch_size: Number of samples per batch. If unspecified, batch_size will default to 32. verbose: Verbosity mode. 0 = silent, 1 = progress bar. Controls the verbosity of [keras.Model.predict](https://keras.io/api/models/model_training_apis/#predict-method) **kwargs: Any arguments supported by keras.Model.predict. # Returns A list of numpy.ndarray objects or a single numpy.ndarray. The predicted results. """ self._check_data_format(x, None, predict=True) dataset = self._adapt(x, self.inputs) pipeline = self.tuner.get_best_pipeline() model = self.tuner.get_best_model() dataset = pipeline.transform_x(dataset) y = utils.predict_with_adaptive_batch_size( model=model, batch_size=batch_size, x=dataset, verbose=verbose, **kwargs ) return pipeline.postprocess(y) def evaluate(self, x, y=None, batch_size=32, verbose=1, **kwargs): """Evaluate the best model for the given data. # Arguments x: Any allowed types according to the input node. Testing data. y: Any allowed types according to the head. Testing targets. Defaults to None. batch_size: Number of samples per batch. If unspecified, batch_size will default to 32. verbose: Verbosity mode. 0 = silent, 1 = progress bar. Controls the verbosity of [keras.Model.evaluate](https://keras.io/api/models/model_training_apis/#evaluate-method) **kwargs: Any arguments supported by keras.Model.evaluate. # Returns Scalar test loss (if the model has a single output and no metrics) or list of scalars (if the model has multiple outputs and/or metrics). The attribute model.metrics_names will give you the display labels for the scalar outputs. """ self._check_data_format(x, y) x = self._adapt(x, self.inputs) y = self._adapt(y, self._heads) pipeline = self.tuner.get_best_pipeline() x, y = pipeline.transform((x, y)) model = self.tuner.get_best_model() return utils.evaluate_with_adaptive_batch_size( model=model, batch_size=batch_size, x=x, y=y, verbose=verbose, **kwargs ) def export_model(self): """Export the best Keras Model. # Returns keras.Model instance. The best model found during the search, loaded with trained weights. """ return self.tuner.get_best_model() ================================================ FILE: autokeras/auto_model_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from unittest import mock import keras_tuner import numpy as np import pytest import autokeras as ak from autokeras import test_utils def get_tuner_class(*args, **kwargs): pipeline = mock.Mock() pipeline.transform_x.side_effect = lambda x: x tuner = mock.Mock() tuner.get_best_pipeline.return_value = pipeline tuner_class = mock.Mock() tuner_class.return_value = tuner return tuner_class def test_auto_model_objective_is_kt_objective(tmp_path): auto_model = ak.AutoModel( ak.ImageInput(), ak.RegressionHead(), directory=tmp_path ) assert isinstance(auto_model.objective, keras_tuner.Objective) def test_auto_model_max_trial_field_as_specified(tmp_path): auto_model = ak.AutoModel( ak.ImageInput(), ak.RegressionHead(), directory=tmp_path, max_trials=10 ) assert auto_model.max_trials == 10 def test_auto_model_directory_field_as_specified(tmp_path): auto_model = ak.AutoModel( ak.ImageInput(), ak.RegressionHead(), directory=tmp_path ) assert auto_model.directory == tmp_path def test_auto_model_project_name_field_as_specified(tmp_path): auto_model = ak.AutoModel( ak.ImageInput(), ak.RegressionHead(), directory=tmp_path, project_name="auto_model", ) assert auto_model.project_name == "auto_model" @mock.patch("autokeras.auto_model.get_tuner_class") def test_evaluate(tuner_fn, tmp_path): x_train = np.random.rand(100, 32) y_train = np.random.rand(100, 1) input_node = ak.Input() output_node = input_node output_node = ak.DenseBlock()(output_node) output_node = ak.RegressionHead()(output_node) auto_model = ak.AutoModel( input_node, output_node, directory=tmp_path, max_trials=1 ) auto_model.fit( x_train, y_train, epochs=1, validation_data=(x_train, y_train) ) pipeline = tuner_fn.return_value.return_value.get_best_pipeline.return_value pipeline.transform.return_value = (x_train, y_train) auto_model.evaluate(x_train, y_train) assert tuner_fn.called def get_single_io_auto_model(tmp_path): return ak.AutoModel( ak.ImageInput(), ak.RegressionHead(), directory=tmp_path, max_trials=2 ) @mock.patch("autokeras.auto_model.get_tuner_class", side_effect=get_tuner_class) def test_auto_model_predict(tuner_fn, tmp_path): x_train = np.random.rand(100, 32, 32, 3) y_train = np.random.rand(100, 1) auto_model = get_single_io_auto_model(tmp_path) auto_model.fit(x_train, y_train, epochs=2, validation_split=0.2) auto_model.predict(x_train) assert tuner_fn.called @mock.patch("autokeras.auto_model.get_tuner_class") def test_final_fit_concat(tuner_fn, tmp_path): tuner = tuner_fn.return_value.return_value x_train = np.random.rand(100, 32, 32, 3) y_train = np.random.rand(100, 1) auto_model = get_single_io_auto_model(tmp_path) auto_model.fit(x_train, y_train, epochs=2, validation_split=0.2) assert tuner.search.call_args_list[0][1]["validation_split"] @mock.patch("autokeras.auto_model.get_tuner_class") def test_final_fit_not_concat(tuner_fn, tmp_path): tuner = tuner_fn.return_value.return_value x_train = np.random.rand(100, 32, 32, 3) y_train = np.random.rand(100, 1) auto_model = get_single_io_auto_model(tmp_path) auto_model.fit( x_train, y_train, epochs=2, validation_data=(x_train, y_train) ) assert not tuner.search.call_args_list[0][1]["validation_split"] @mock.patch("autokeras.auto_model.get_tuner_class") def test_overwrite(tuner_fn, tmp_path): tuner_class = tuner_fn.return_value x_train = np.random.rand(100, 32, 32, 3) y_train = np.random.rand(100, 1) auto_model = get_single_io_auto_model(tmp_path) auto_model.fit( x_train, y_train, epochs=2, validation_data=(x_train, y_train) ) assert not tuner_class.call_args_list[0][1]["overwrite"] @mock.patch("autokeras.auto_model.get_tuner_class") def test_export_model(tuner_fn, tmp_path): tuner_class = tuner_fn.return_value tuner = tuner_class.return_value x_train = np.random.rand(100, 32, 32, 3) y_train = np.random.rand(100, 1) auto_model = get_single_io_auto_model(tmp_path) auto_model.fit( x_train, y_train, epochs=2, validation_data=(x_train, y_train) ) auto_model.export_model() assert tuner.get_best_model.called def get_multi_io_auto_model(tmp_path): return ak.AutoModel( [ak.ImageInput(), ak.ImageInput()], [ak.RegressionHead(), ak.RegressionHead()], directory=tmp_path, max_trials=2, overwrite=False, ) def dataset_error(x, y, validation_data, message, tmp_path): auto_model = get_multi_io_auto_model(tmp_path) with pytest.raises(ValueError) as info: auto_model.fit(x, y, epochs=2, validation_data=validation_data) assert message in str(info.value) @mock.patch("autokeras.auto_model.get_tuner_class", side_effect=get_tuner_class) def test_multi_input_predict(tuner_fn, tmp_path): auto_model = get_multi_io_auto_model(tmp_path) x1 = test_utils.generate_data() y1 = test_utils.generate_data(shape=(1,)) auto_model.fit( x=(x1, x1), y=(y1, y1), epochs=2, validation_data=((x1, x1), (y1, y1)) ) auto_model.predict(x=((x1, x1),)) @mock.patch("autokeras.auto_model.get_tuner_class", side_effect=get_tuner_class) def test_multi_input_predict2(tuner_fn, tmp_path): auto_model = get_multi_io_auto_model(tmp_path) x1 = test_utils.generate_data() y1 = test_utils.generate_data(shape=(1,)) auto_model.fit( x=(x1, x1), y=(y1, y1), epochs=2, validation_data=((x1, x1), (y1, y1)) ) auto_model.predict(x=(x1, x1)) def test_invalid_tuner_name_error(tmp_path): with pytest.raises(ValueError) as info: ak.AutoModel( ak.ImageInput(), ak.RegressionHead(), directory=tmp_path, tuner="unknown", ) assert "Expected the tuner argument to be one of" in str(info.value) def test_no_validation_data_nor_split_error(tmp_path): auto_model = ak.AutoModel( ak.ImageInput(), ak.RegressionHead(), directory=tmp_path ) with pytest.raises(ValueError) as info: auto_model.fit( x=np.random.rand(100, 32, 32, 3), y=np.random.rand(100, 1), validation_split=0, ) assert "Either validation_data or a non-zero" in str(info.value) @mock.patch("autokeras.auto_model.get_tuner_class", side_effect=get_tuner_class) def test_predict_tuple_x_and_tuple_y_predict_doesnt_crash(tuner_fn, tmp_path): auto_model = ak.AutoModel( ak.ImageInput(), ak.RegressionHead(), directory=tmp_path ) x, y = (np.random.rand(100, 32, 32, 3),), (np.random.rand(100, 1),) auto_model.fit(x, y) auto_model.predict(x) def test_fit_with_non_numpy_data_raises_error(tmp_path): auto_model = ak.AutoModel( ak.ImageInput(), ak.RegressionHead(), directory=tmp_path ) x = [[[1, 2, 3]] * 32] * 32 # list, not np.ndarray y = [1] match_str = "Expected x to be a numpy array" with pytest.raises(ValueError, match=match_str): auto_model.fit(x, y) def test_fit_with_wrong_array_count_raises_error(tmp_path): auto_model = ak.AutoModel( ak.ImageInput(), ak.RegressionHead(), directory=tmp_path ) x = ( np.random.rand(10, 32, 32, 3), np.random.rand(10, 32, 32, 3), ) # 2 arrays y = np.random.rand(10, 1) match_str = "Expected x to have 1 arrays, but got 2" with pytest.raises(ValueError, match=match_str): auto_model.fit(x, y) ================================================ FILE: autokeras/blocks/__init__.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import keras from autokeras.blocks.basic import ConvBlock from autokeras.blocks.basic import DenseBlock from autokeras.blocks.basic import EfficientNetBlock from autokeras.blocks.basic import Embedding from autokeras.blocks.basic import ResNetBlock from autokeras.blocks.basic import RNNBlock from autokeras.blocks.basic import XceptionBlock from autokeras.blocks.heads import ClassificationHead from autokeras.blocks.heads import RegressionHead from autokeras.blocks.preprocessing import ImageAugmentation from autokeras.blocks.preprocessing import Normalization from autokeras.blocks.reduction import Flatten from autokeras.blocks.reduction import Merge from autokeras.blocks.reduction import SpatialReduction from autokeras.blocks.reduction import TemporalReduction from autokeras.blocks.wrapper import GeneralBlock from autokeras.blocks.wrapper import ImageBlock from autokeras.blocks.wrapper import StructuredDataBlock from autokeras.blocks.wrapper import TextBlock from autokeras.utils import utils def serialize(obj): return utils.serialize_keras_object(obj) def deserialize(config, custom_objects=None): return utils.deserialize_keras_object( config, module_objects=globals(), custom_objects=custom_objects, ) ================================================ FILE: autokeras/blocks/basic.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from typing import Optional from typing import Union import keras import tree from keras import applications from keras import layers from keras_tuner.engine import hyperparameters from autokeras.blocks import reduction from autokeras.engine import block as block_module from autokeras.utils import io_utils from autokeras.utils import layer_utils from autokeras.utils import utils RESNET_V1 = { "resnet50": applications.ResNet50, "resnet101": applications.ResNet101, "resnet152": applications.ResNet152, } RESNET_V2 = { "resnet50_v2": applications.ResNet50V2, "resnet101_v2": applications.ResNet101V2, "resnet152_v2": applications.ResNet152V2, } EFFICIENT_VERSIONS = { "b0": applications.EfficientNetB0, "b1": applications.EfficientNetB1, "b2": applications.EfficientNetB2, "b3": applications.EfficientNetB3, "b4": applications.EfficientNetB4, "b5": applications.EfficientNetB5, "b6": applications.EfficientNetB6, "b7": applications.EfficientNetB7, } PRETRAINED = "pretrained" @keras.utils.register_keras_serializable(package="autokeras") class DenseBlock(block_module.Block): """Block for Dense layers. # Arguments num_layers: Int or keras_tuner.engine.hyperparameters.Choice. The number of Dense layers in the block. If left unspecified, it will be tuned automatically. num_units: Int or keras_tuner.engine.hyperparameters.Choice. The number of units in each dense layer. If left unspecified, it will be tuned automatically. use_bn: Boolean. Whether to use BatchNormalization layers. If left unspecified, it will be tuned automatically. dropout: Float or keras_tuner.engine.hyperparameters.Choice. The dropout rate for the layers. If left unspecified, it will be tuned automatically. """ def __init__( self, num_layers: Optional[Union[int, hyperparameters.Choice]] = None, num_units: Optional[Union[int, hyperparameters.Choice]] = None, use_batchnorm: Optional[bool] = None, dropout: Optional[Union[float, hyperparameters.Choice]] = None, **kwargs, ): super().__init__(**kwargs) self.num_layers = utils.get_hyperparameter( num_layers, hyperparameters.Choice("num_layers", [1, 2, 3], default=2), int, ) self.num_units = utils.get_hyperparameter( num_units, hyperparameters.Choice( "num_units", [16, 32, 64, 128, 256, 512, 1024], default=32 ), int, ) self.use_batchnorm = use_batchnorm self.dropout = utils.get_hyperparameter( dropout, hyperparameters.Choice("dropout", [0.0, 0.25, 0.5], default=0.0), float, ) def get_config(self): config = super().get_config() config.update( { "num_layers": io_utils.serialize_block_arg(self.num_layers), "num_units": io_utils.serialize_block_arg(self.num_units), "use_batchnorm": self.use_batchnorm, "dropout": io_utils.serialize_block_arg(self.dropout), } ) return config @classmethod def from_config(cls, config): config["num_layers"] = io_utils.deserialize_block_arg( config["num_layers"] ) config["num_units"] = io_utils.deserialize_block_arg( config["num_units"] ) config["dropout"] = io_utils.deserialize_block_arg(config["dropout"]) return cls(**config) def build(self, hp, inputs=None): inputs = tree.flatten(inputs) utils.validate_num_inputs(inputs, 1) input_node = inputs[0] output_node = input_node output_node = reduction.Flatten().build(hp, output_node) use_batchnorm = self.use_batchnorm if use_batchnorm is None: use_batchnorm = hp.Boolean("use_batchnorm", default=False) for i in range(utils.add_to_hp(self.num_layers, hp)): units = utils.add_to_hp(self.num_units, hp, "units_{i}".format(i=i)) output_node = layers.Dense(units)(output_node) if use_batchnorm: output_node = layers.BatchNormalization()(output_node) output_node = layers.ReLU()(output_node) if utils.add_to_hp(self.dropout, hp) > 0: output_node = layers.Dropout(utils.add_to_hp(self.dropout, hp))( output_node ) return output_node @keras.utils.register_keras_serializable(package="autokeras") class RNNBlock(block_module.Block): """An RNN Block. # Arguments return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence. Defaults to False. bidirectional: Boolean or keras_tuner.engine.hyperparameters.Boolean. Bidirectional RNN. If left unspecified, it will be tuned automatically. num_layers: Int or keras_tuner.engine.hyperparameters.Choice. The number of layers in RNN. If left unspecified, it will be tuned automatically. layer_type: String or or keras_tuner.engine.hyperparameters.Choice. 'gru' or 'lstm'. If left unspecified, it will be tuned automatically. """ def __init__( self, return_sequences: bool = False, bidirectional: Optional[Union[bool, hyperparameters.Boolean]] = None, num_layers: Optional[Union[int, hyperparameters.Choice]] = None, layer_type: Optional[Union[str, hyperparameters.Choice]] = None, **kwargs, ): super().__init__(**kwargs) self.return_sequences = return_sequences self.bidirectional = utils.get_hyperparameter( bidirectional, hyperparameters.Boolean("bidirectional", default=True), bool, ) self.num_layers = utils.get_hyperparameter( num_layers, hyperparameters.Choice("num_layers", [1, 2, 3], default=2), int, ) self.layer_type = utils.get_hyperparameter( layer_type, hyperparameters.Choice( "layer_type", ["gru", "lstm"], default="lstm" ), str, ) def get_config(self): config = super().get_config() config.update( { "return_sequences": self.return_sequences, "bidirectional": io_utils.serialize_block_arg( self.bidirectional ), "num_layers": io_utils.serialize_block_arg(self.num_layers), "layer_type": io_utils.serialize_block_arg(self.layer_type), } ) return config @classmethod def from_config(cls, config): config["bidirectional"] = io_utils.deserialize_block_arg( config["bidirectional"] ) config["num_layers"] = io_utils.deserialize_block_arg( config["num_layers"] ) config["layer_type"] = io_utils.deserialize_block_arg( config["layer_type"] ) return cls(**config) def build(self, hp, inputs=None): inputs = tree.flatten(inputs) utils.validate_num_inputs(inputs, 1) input_node = inputs[0] shape = list(input_node.shape) if len(shape) != 3: raise ValueError( "Expect the input tensor of RNNBlock to have dimensions of " "[batch_size, time_steps, vec_len], " "but got {shape}".format(shape=input_node.shape) ) feature_size = shape[-1] output_node = input_node bidirectional = utils.add_to_hp(self.bidirectional, hp) layer_type = utils.add_to_hp(self.layer_type, hp) num_layers = utils.add_to_hp(self.num_layers, hp) rnn_layers = {"gru": layers.GRU, "lstm": layers.LSTM} in_layer = rnn_layers[layer_type] for i in range(num_layers): return_sequences = True if i == num_layers - 1: return_sequences = self.return_sequences if bidirectional: output_node = layers.Bidirectional( # pragma: no cover in_layer(feature_size, return_sequences=return_sequences) )(output_node) else: output_node = in_layer( feature_size, return_sequences=return_sequences )(output_node) return output_node @keras.utils.register_keras_serializable(package="autokeras") class ConvBlock(block_module.Block): """Block for vanilla ConvNets. # Arguments kernel_size: Int or keras_tuner.engine.hyperparameters.Choice. The size of the kernel. If left unspecified, it will be tuned automatically. num_blocks: Int or keras_tuner.engine.hyperparameters.Choice. The number of conv blocks, each of which may contain convolutional, max pooling, dropout, and activation. If left unspecified, it will be tuned automatically. num_layers: Int or hyperparameters.Choice. The number of convolutional layers in each block. If left unspecified, it will be tuned automatically. filters: Int or keras_tuner.engine.hyperparameters.Choice. The number of filters in the convolutional layers. If left unspecified, it will be tuned automatically. max_pooling: Boolean. Whether to use max pooling layer in each block. If left unspecified, it will be tuned automatically. separable: Boolean. Whether to use separable conv layers. If left unspecified, it will be tuned automatically. dropout: Float or kerastuner.engine.hyperparameters. Choice range Between 0 and 1. The dropout rate after convolutional layers. If left unspecified, it will be tuned automatically. """ def __init__( self, kernel_size: Optional[Union[int, hyperparameters.Choice]] = None, num_blocks: Optional[Union[int, hyperparameters.Choice]] = None, num_layers: Optional[Union[int, hyperparameters.Choice]] = None, filters: Optional[Union[int, hyperparameters.Choice]] = None, max_pooling: Optional[bool] = None, separable: Optional[bool] = None, dropout: Optional[Union[float, hyperparameters.Choice]] = None, **kwargs, ): super().__init__(**kwargs) self.kernel_size = utils.get_hyperparameter( kernel_size, hyperparameters.Choice("kernel_size", [3, 5, 7], default=3), int, ) self.num_blocks = utils.get_hyperparameter( num_blocks, hyperparameters.Choice("num_blocks", [1, 2, 3], default=2), int, ) self.num_layers = utils.get_hyperparameter( num_layers, hyperparameters.Choice("num_layers", [1, 2], default=2), int, ) self.filters = utils.get_hyperparameter( filters, hyperparameters.Choice( "filters", [16, 32, 64, 128, 256, 512], default=32 ), int, ) self.max_pooling = max_pooling self.separable = separable self.dropout = utils.get_hyperparameter( dropout, hyperparameters.Choice("dropout", [0.0, 0.25, 0.5], default=0.0), float, ) def get_config(self): config = super().get_config() config.update( { "kernel_size": io_utils.serialize_block_arg(self.kernel_size), "num_blocks": io_utils.serialize_block_arg(self.num_blocks), "num_layers": io_utils.serialize_block_arg(self.num_layers), "filters": io_utils.serialize_block_arg(self.filters), "max_pooling": self.max_pooling, "separable": self.separable, "dropout": io_utils.serialize_block_arg(self.dropout), } ) return config @classmethod def from_config(cls, config): config["kernel_size"] = io_utils.deserialize_block_arg( config["kernel_size"] ) config["num_blocks"] = io_utils.deserialize_block_arg( config["num_blocks"] ) config["num_layers"] = io_utils.deserialize_block_arg( config["num_layers"] ) config["filters"] = io_utils.deserialize_block_arg(config["filters"]) config["dropout"] = io_utils.deserialize_block_arg(config["dropout"]) return cls(**config) def build(self, hp, inputs=None): inputs = tree.flatten(inputs) utils.validate_num_inputs(inputs, 1) input_node = inputs[0] output_node = input_node kernel_size = utils.add_to_hp(self.kernel_size, hp) separable = self.separable if separable is None: separable = hp.Boolean("separable", default=False) if separable: conv = layer_utils.get_sep_conv( input_node.shape ) # pragma: no cover else: conv = layer_utils.get_conv(input_node.shape) max_pooling = self.max_pooling if max_pooling is None: max_pooling = hp.Boolean("max_pooling", default=True) pool = layer_utils.get_max_pooling(input_node.shape) for i in range(utils.add_to_hp(self.num_blocks, hp)): for j in range(utils.add_to_hp(self.num_layers, hp)): output_node = conv( utils.add_to_hp( self.filters, hp, "filters_{i}_{j}".format(i=i, j=j) ), kernel_size, padding=self._get_padding(kernel_size, output_node), activation="relu", )(output_node) if max_pooling: output_node = pool( kernel_size - 1, padding=self._get_padding(kernel_size - 1, output_node), )(output_node) if utils.add_to_hp(self.dropout, hp) > 0: output_node = layers.Dropout(utils.add_to_hp(self.dropout, hp))( output_node ) return output_node @staticmethod def _get_padding(kernel_size, output_node): if all(kernel_size * 2 <= length for length in output_node.shape[1:-1]): return "valid" return "same" @keras.utils.register_keras_serializable(package="autokeras") class KerasApplicationBlock(block_module.Block): """Blocks extending Keras applications.""" def __init__(self, pretrained, models, min_size, **kwargs): super().__init__(**kwargs) self.pretrained = pretrained self.models = models self.min_size = min_size def get_config(self): config = super().get_config() config.update({"pretrained": self.pretrained}) return config def build(self, hp, inputs=None): input_node = tree.flatten(inputs)[0] pretrained = self.pretrained if input_node.shape[3] not in [1, 3]: if self.pretrained: raise ValueError( "When pretrained is set to True, expect input to " "have 1 or 3 channels, bug got " "{channels}.".format(channels=input_node.shape[3]) ) pretrained = False if pretrained is None: pretrained = hp.Boolean(PRETRAINED, default=False) if pretrained: with hp.conditional_scope(PRETRAINED, [True]): trainable = hp.Boolean("trainable", default=False) elif pretrained: trainable = hp.Boolean("trainable", default=False) if len(self.models) > 1: version = hp.Choice("version", list(self.models.keys())) else: version = list(self.models.keys())[0] min_size = self.min_size if hp.Boolean("imagenet_size", default=False): min_size = 224 if input_node.shape[1] < min_size or input_node.shape[2] < min_size: input_node = layers.Resizing( max(min_size, input_node.shape[1]), max(min_size, input_node.shape[2]), )(input_node) if input_node.shape[3] == 1: input_node = layers.Concatenate()([input_node] * 3) if input_node.shape[3] != 3: input_node = layers.Conv2D( filters=3, kernel_size=1, padding="same" )(input_node) if pretrained: model = self.models[version](weights="imagenet", include_top=False) model.trainable = trainable else: model = self.models[version]( weights=None, include_top=False, input_shape=input_node.shape[1:], ) return model(input_node) @keras.utils.register_keras_serializable(package="autokeras") class ResNetBlock(KerasApplicationBlock): """Block for ResNet. # Arguments version: String. 'v1', 'v2'. The type of ResNet to use. If left unspecified, it will be tuned automatically. pretrained: Boolean. Whether to use ImageNet pretrained weights. If left unspecified, it will be tuned automatically. """ def __init__( self, version: Optional[str] = None, pretrained: Optional[bool] = None, **kwargs, ): if version is None: models = {**RESNET_V1, **RESNET_V2} elif version == "v1": models = RESNET_V1 elif version == "v2": models = RESNET_V2 else: raise ValueError( 'Expect version to be "v1", or "v2", but got ' "{version}.".format(version=version) ) super().__init__( pretrained=pretrained, models=models, min_size=32, **kwargs ) self.version = version def get_config(self): config = super().get_config() config.update({"version": self.version}) return config @keras.utils.register_keras_serializable(package="autokeras") class XceptionBlock(KerasApplicationBlock): """Block for XceptionNet. An Xception structure, used for specifying your model with specific datasets. The original Xception architecture is from [https://arxiv.org/abs/1610.02357](https://arxiv.org/abs/1610.02357). The data first goes through the entry flow, then through the middle flow which is repeated eight times, and finally through the exit flow. This XceptionBlock returns a similar architecture as Xception except without the last (optional) fully connected layer(s) and logistic regression. The size of this architecture could be decided by `HyperParameters`, to get an architecture with a half, an identical, or a double size of the original one. # Arguments pretrained: Boolean. Whether to use ImageNet pretrained weights. If left unspecified, it will be tuned automatically. """ def __init__(self, pretrained: Optional[bool] = None, **kwargs): super().__init__( pretrained=pretrained, models={"xception": applications.Xception}, min_size=71, **kwargs, ) @keras.utils.register_keras_serializable(package="autokeras") class EfficientNetBlock(KerasApplicationBlock): """Block for EfficientNet. # Arguments version: String. The value should be one of 'b0', 'b1', ..., 'b7'. The type of EfficientNet to use. If left unspecified, it will be tuned automatically. pretrained: Boolean. Whether to use ImageNet pretrained weights. If left unspecified, it will be tuned automatically. """ def __init__( self, version: Optional[str] = None, pretrained: Optional[bool] = None, **kwargs, ): if version is None: models = EFFICIENT_VERSIONS elif version in EFFICIENT_VERSIONS.keys(): models = {version: EFFICIENT_VERSIONS[version]} else: raise ValueError( "Expect version to be in {expect}, but got " "{version}.".format( expect=list(EFFICIENT_VERSIONS.keys()), version=version ) ) super().__init__( pretrained=pretrained, models=models, min_size=32, **kwargs, ) self.version = version @keras.utils.register_keras_serializable(package="autokeras") class Embedding(block_module.Block): """Word embedding block for sequences. The input should be tokenized sequences with the same length, where each element of a sequence should be the index of the word. # Arguments max_features: Int. Size of the vocabulary. Must be set if not using TextToIntSequence before this block. Defaults to 20001. embedding_dim: Int or keras_tuner.engine.hyperparameters.Choice. Output dimension of the Attention block. If left unspecified, it will be tuned automatically. dropout: Float or keras_tuner.engine.hyperparameters.Choice. The dropout rate for the layers. If left unspecified, it will be tuned automatically. """ def __init__( self, max_features: int = 20001, embedding_dim: Optional[Union[int, hyperparameters.Choice]] = None, dropout: Optional[Union[float, hyperparameters.Choice]] = None, **kwargs, ): super().__init__(**kwargs) self.max_features = max_features self.embedding_dim = utils.get_hyperparameter( embedding_dim, hyperparameters.Choice( "embedding_dim", [32, 64, 128, 256, 512], default=128 ), int, ) self.dropout = utils.get_hyperparameter( dropout, hyperparameters.Choice("dropout", [0.0, 0.25, 0.5], default=0.25), float, ) def get_config(self): config = super().get_config() config.update( { "max_features": self.max_features, "embedding_dim": io_utils.serialize_block_arg( self.embedding_dim ), "dropout": io_utils.serialize_block_arg(self.dropout), } ) return config @classmethod def from_config(cls, config): config["dropout"] = io_utils.deserialize_block_arg(config["dropout"]) config["embedding_dim"] = io_utils.deserialize_block_arg( config["embedding_dim"] ) return cls(**config) def build(self, hp, inputs=None): input_node = tree.flatten(inputs)[0] embedding_dim = utils.add_to_hp(self.embedding_dim, hp) layer = layers.Embedding( input_dim=self.max_features, output_dim=embedding_dim ) output_node = layer(input_node) dropout = utils.add_to_hp(self.dropout, hp) if dropout > 0: output_node = layers.Dropout(dropout)(output_node) return output_node ================================================ FILE: autokeras/blocks/basic_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import keras import keras_tuner import pytest import tree from autokeras import blocks from autokeras import test_utils def test_resnet_build_return_tensor(): block = blocks.ResNetBlock() outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32, 32, 3), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_resnet_v1_return_tensor(): block = blocks.ResNetBlock(version="v1") outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32, 32, 3), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_efficientnet_b0_return_tensor(): block = blocks.EfficientNetBlock(version="b0", pretrained=False) outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32, 32, 3), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_resnet_pretrained_build_return_tensor(): block = blocks.ResNetBlock(pretrained=True) outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32, 32, 3), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_resnet_pretrained_with_one_channel_input(): block = blocks.ResNetBlock(pretrained=True) outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(28, 28, 1), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_resnet_pretrained_error_with_two_channels(): block = blocks.ResNetBlock(pretrained=True) with pytest.raises(ValueError) as info: block.build( keras_tuner.HyperParameters(), keras.Input(shape=(224, 224, 2), dtype="float32"), ) assert "When pretrained is set to True" in str(info.value) def test_resnet_deserialize_to_resnet(): serialized_block = blocks.serialize(blocks.ResNetBlock()) block = blocks.deserialize(serialized_block) assert isinstance(block, blocks.ResNetBlock) def test_resnet_get_config_has_all_attributes(): block = blocks.ResNetBlock() config = block.get_config() assert test_utils.get_func_args(blocks.ResNetBlock.__init__).issubset( config.keys() ) def test_resnet_wrong_version_error(): with pytest.raises(ValueError) as info: blocks.ResNetBlock(version="abc") assert "Expect version to be" in str(info.value) def test_efficientnet_wrong_version_error(): with pytest.raises(ValueError) as info: blocks.EfficientNetBlock(version="abc") assert "Expect version to be" in str(info.value) def test_xception_build_return_tensor(): block = blocks.XceptionBlock() outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32, 32, 2), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_xception_pretrained_build_return_tensor(): block = blocks.XceptionBlock(pretrained=True) outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32, 32, 3), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_xception_pretrained_with_one_channel_input(): block = blocks.XceptionBlock(pretrained=True) outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(224, 224, 1), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_xception_pretrained_error_with_two_channels(): block = blocks.XceptionBlock(pretrained=True) with pytest.raises(ValueError) as info: block.build( keras_tuner.HyperParameters(), keras.Input(shape=(224, 224, 2), dtype="float32"), ) assert "When pretrained is set to True" in str(info.value) def test_xception_deserialize_to_xception(): serialized_block = blocks.serialize(blocks.XceptionBlock()) block = blocks.deserialize(serialized_block) assert isinstance(block, blocks.XceptionBlock) def test_xception_get_config_has_all_attributes(): block = blocks.XceptionBlock() config = block.get_config() assert test_utils.get_func_args(blocks.XceptionBlock.__init__).issubset( config.keys() ) def test_conv_build_return_tensor(): block = blocks.ConvBlock() outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32, 32, 3), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_conv_with_small_image_size_return_tensor(): block = blocks.ConvBlock() outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(10, 10, 3), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_conv_build_with_dropout_return_tensor(): block = blocks.ConvBlock(dropout=0.5) outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32, 32, 3), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_conv_deserialize_to_conv(): serialized_block = blocks.serialize(blocks.ConvBlock()) block = blocks.deserialize(serialized_block) assert isinstance(block, blocks.ConvBlock) def test_conv_get_config_has_all_attributes(): block = blocks.ConvBlock() config = block.get_config() assert test_utils.get_func_args(blocks.ConvBlock.__init__).issubset( config.keys() ) def test_rnn_build_return_tensor(): block = blocks.RNNBlock(bidirectional=False) outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32, 10), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_rnn_input_shape_one_dim_error(): block = blocks.RNNBlock() with pytest.raises(ValueError) as info: block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32,), dtype="float32"), ) assert "Expect the input tensor of RNNBlock" in str(info.value) def test_rnn_deserialize_to_rnn(): serialized_block = blocks.serialize(blocks.RNNBlock()) block = blocks.deserialize(serialized_block) assert isinstance(block, blocks.RNNBlock) def test_rnn_get_config_has_all_attributes(): block = blocks.RNNBlock() config = block.get_config() assert test_utils.get_func_args(blocks.RNNBlock.__init__).issubset( config.keys() ) def test_dense_build_return_tensor(): block = blocks.DenseBlock( num_units=keras_tuner.engine.hyperparameters.Choice( "num_units", [10, 20] ) ) outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32,), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_dense_build_with_dropout_return_tensor(): block = blocks.DenseBlock(dropout=0.5) outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32,), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_dense_build_with_bn_return_tensor(): block = blocks.DenseBlock(use_batchnorm=True) outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32,), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_dense_deserialize_to_dense(): serialized_block = blocks.serialize(blocks.DenseBlock()) block = blocks.deserialize(serialized_block) assert isinstance(block, blocks.DenseBlock) def test_dense_get_config_has_all_attributes(): block = blocks.DenseBlock() config = block.get_config() assert test_utils.get_func_args(blocks.DenseBlock.__init__).issubset( config.keys() ) def test_embed_build_return_tensor(): block = blocks.Embedding() outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32,), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_embed_deserialize_to_embed(): serialized_block = blocks.serialize(blocks.Embedding()) block = blocks.deserialize(serialized_block) assert isinstance(block, blocks.Embedding) def test_embed_get_config_has_all_attributes(): block = blocks.Embedding() config = block.get_config() assert test_utils.get_func_args(blocks.Embedding.__init__).issubset( config.keys() ) ================================================ FILE: autokeras/blocks/heads.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from typing import Optional import keras import tree from keras import activations from keras import layers from keras import losses from autokeras import adapters from autokeras import analysers from autokeras import hyper_preprocessors as hpps_module from autokeras import preprocessors from autokeras.blocks import reduction from autokeras.engine import head as head_module from autokeras.utils import types from autokeras.utils import utils @keras.utils.register_keras_serializable(package="autokeras") class ClassificationHead(head_module.Head): """Classification Dense layers. Use sigmoid and binary crossentropy for binary classification and multi-label classification. Use softmax and categorical crossentropy for multi-class (more than 2) classification. Use Accuracy as metrics by default. The targets passing to the head would have to be np.ndarray. It can be raw labels, one-hot encoded if more than two classes, or binary encoded for binary classification. The raw labels will be encoded to one column if two classes were found, or one-hot encoded if more than two classes were found. # Arguments num_classes: Int. Defaults to None. If None, it will be inferred from the data. multi_label: Boolean. Defaults to False. loss: A Keras loss function. Defaults to use `binary_crossentropy` or `categorical_crossentropy` based on the number of classes. metrics: A list of Keras metrics. Defaults to use 'accuracy'. dropout: Float. The dropout rate for the layers. If left unspecified, it will be tuned automatically. """ def __init__( self, num_classes: Optional[int] = None, multi_label: bool = False, loss: Optional[types.LossType] = None, metrics: Optional[types.MetricsType] = None, dropout: Optional[float] = None, **kwargs ): self.num_classes = num_classes self.multi_label = multi_label self.dropout = dropout if metrics is None: metrics = ["accuracy"] if loss is None: loss = self.infer_loss() super().__init__(loss=loss, metrics=metrics, **kwargs) # Infered from analyser. self._encoded = None self._encoded_for_sigmoid = None self._encoded_for_softmax = None self._add_one_dimension = False self._labels = None def infer_loss(self): if not self.num_classes: return None if self.num_classes == 2 or self.multi_label: return losses.BinaryCrossentropy() return losses.CategoricalCrossentropy() def get_config(self): config = super().get_config() config.update( { "num_classes": self.num_classes, "multi_label": self.multi_label, "dropout": self.dropout, } ) return config def build(self, hp, inputs=None): inputs = tree.flatten(inputs) utils.validate_num_inputs(inputs, 1) input_node = inputs[0] output_node = input_node # Reduce the tensor to a vector. if len(output_node.shape) > 2: output_node = reduction.SpatialReduction().build(hp, output_node) if self.dropout is not None: dropout = self.dropout else: dropout = hp.Choice("dropout", [0.0, 0.25, 0.5], default=0) if dropout > 0: output_node = layers.Dropout(dropout)(output_node) output_node = layers.Dense(self.shape[-1])(output_node) if isinstance(self.loss, keras.losses.BinaryCrossentropy): output_node = layers.Activation( activations.sigmoid, name=self.name )(output_node) else: output_node = layers.Softmax(name=self.name)(output_node) return output_node def get_adapter(self): return adapters.ClassificationAdapter(name=self.name) def get_analyser(self): return analysers.ClassificationAnalyser( name=self.name, multi_label=self.multi_label ) def config_from_analyser(self, analyser): super().config_from_analyser(analyser) self.num_classes = analyser.num_classes self.loss = self.infer_loss() self._encoded = analyser.encoded self._encoded_for_sigmoid = analyser.encoded_for_sigmoid self._encoded_for_softmax = analyser.encoded_for_softmax self._add_one_dimension = len(analyser.shape) == 1 self._labels = analyser.labels def get_hyper_preprocessors(self): hyper_preprocessors = [] if self._add_one_dimension: hyper_preprocessors.append( hpps_module.DefaultHyperPreprocessor( preprocessors.AddOneDimension() ) ) if self.dtype in ["uint8", "uint16", "uint32", "uint64"]: hyper_preprocessors.append( hpps_module.DefaultHyperPreprocessor( preprocessors.CastToInt32() ) ) if not self._encoded and self.dtype != "string": hyper_preprocessors.append( hpps_module.DefaultHyperPreprocessor( preprocessors.CastToString() ) ) if self._encoded_for_sigmoid: hyper_preprocessors.append( hpps_module.DefaultHyperPreprocessor( preprocessors.SigmoidPostprocessor() ) ) elif self._encoded_for_softmax: hyper_preprocessors.append( hpps_module.DefaultHyperPreprocessor( preprocessors.SoftmaxPostprocessor() ) ) elif self.num_classes == 2: hyper_preprocessors.append( hpps_module.DefaultHyperPreprocessor( preprocessors.LabelEncoder(self._labels) ) ) else: hyper_preprocessors.append( hpps_module.DefaultHyperPreprocessor( preprocessors.OneHotEncoder(self._labels) ) ) return hyper_preprocessors @keras.utils.register_keras_serializable(package="autokeras") class RegressionHead(head_module.Head): """Regression Dense layers. The targets passing to the head would have to be np.ndarray. It can be single-column or multi-column. The values should all be numerical. # Arguments output_dim: Int. The number of output dimensions. Defaults to None. If None, it will be inferred from the data. multi_label: Boolean. Defaults to False. loss: A Keras loss function. Defaults to use `mean_squared_error`. metrics: A list of Keras metrics. Defaults to use `mean_squared_error`. dropout: Float. The dropout rate for the layers. If left unspecified, it will be tuned automatically. """ def __init__( self, output_dim: Optional[int] = None, loss: types.LossType = "mean_squared_error", metrics: Optional[types.MetricsType] = None, dropout: Optional[float] = None, **kwargs ): if metrics is None: metrics = ["mean_squared_error"] super().__init__(loss=loss, metrics=metrics, **kwargs) self.output_dim = output_dim self.dropout = dropout def get_config(self): config = super().get_config() config.update({"output_dim": self.output_dim, "dropout": self.dropout}) return config def build(self, hp, inputs=None): inputs = tree.flatten(inputs) utils.validate_num_inputs(inputs, 1) input_node = inputs[0] output_node = input_node if self.dropout is not None: dropout = self.dropout else: dropout = hp.Choice("dropout", [0.0, 0.25, 0.5], default=0) if dropout > 0: output_node = layers.Dropout(dropout)(output_node) output_node = reduction.Flatten().build(hp, output_node) output_node = layers.Dense(self.shape[-1], name=self.name)(output_node) return output_node def config_from_analyser(self, analyser): super().config_from_analyser(analyser) self._add_one_dimension = len(analyser.shape) == 1 def get_adapter(self): return adapters.RegressionAdapter(name=self.name) def get_analyser(self): return analysers.RegressionAnalyser( name=self.name, output_dim=self.output_dim ) def get_hyper_preprocessors(self): hyper_preprocessors = [] if self._add_one_dimension: hyper_preprocessors.append( # pragma: no cover hpps_module.DefaultHyperPreprocessor( preprocessors.AddOneDimension() ) ) return hyper_preprocessors ================================================ FILE: autokeras/blocks/heads_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import keras import keras_tuner import numpy as np import tree from autokeras import hyper_preprocessors from autokeras import nodes as input_module from autokeras import preprocessors from autokeras import test_utils from autokeras.blocks import heads as head_module def test_two_classes_infer_binary_crossentropy(): dataset = np.array(["a", "a", "a", "b"]) head = head_module.ClassificationHead(name="a", shape=(1,)) adapter = head.get_adapter() dataset = adapter.adapt(dataset) analyser = head.get_analyser() analyser.update(dataset) analyser.finalize() head.config_from_analyser(analyser) head.build( keras_tuner.HyperParameters(), input_module.Input(shape=(32,)).build_node( keras_tuner.HyperParameters() ), ) assert head.loss.name == "binary_crossentropy" def test_three_classes_infer_categorical_crossentropy(): dataset = np.array(["a", "a", "c", "b"]) head = head_module.ClassificationHead(name="a", shape=(1,)) adapter = head.get_adapter() dataset = adapter.adapt(dataset) analyser = head.get_analyser() analyser.update(dataset) analyser.finalize() head.config_from_analyser(analyser) head.build( keras_tuner.HyperParameters(), input_module.Input(shape=(32,)).build_node( keras_tuner.HyperParameters() ), ) assert head.loss.name == "categorical_crossentropy" def test_multi_label_loss(): head = head_module.ClassificationHead( name="a", multi_label=True, num_classes=8, shape=(8,) ) input_node = keras.Input(shape=(5,)) output_node = head.build(keras_tuner.HyperParameters(), input_node) model = keras.Model(input_node, output_node) assert model.layers[-1].activation.__name__ == "sigmoid" assert head.loss.name == "binary_crossentropy" def test_clf_head_get_sigmoid_postprocessor(): head = head_module.ClassificationHead(name="a", multi_label=True) head._encoded = True head._encoded_for_sigmoid = True assert isinstance( head.get_hyper_preprocessors()[0].preprocessor, preprocessors.SigmoidPostprocessor, ) def test_clf_head_with_2_clases_get_label_encoder(): head = head_module.ClassificationHead(name="a", num_classes=2) head._encoded = False head._labels = ["a", "b"] assert isinstance( head.get_hyper_preprocessors()[-1].preprocessor, preprocessors.LabelEncoder, ) def test_clf_head_build_with_zero_dropout_return_tensor(): block = head_module.ClassificationHead(dropout=0, shape=(8,)) outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(5,), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_clf_head_hpps_with_uint8_contain_cast_to_int32(): dataset = test_utils.generate_one_hot_labels(100, 10) dataset = dataset.astype("uint8") head = head_module.ClassificationHead(shape=(8,)) analyser = head.get_analyser() for data in dataset: analyser.update(data) analyser.finalize() head.config_from_analyser(analyser) assert any( [ isinstance(hpp, hyper_preprocessors.DefaultHyperPreprocessor) and isinstance(hpp.preprocessor, preprocessors.CastToInt32) for hpp in head.get_hyper_preprocessors() ] ) def test_reg_head_build_with_zero_dropout_return_tensor(): block = head_module.RegressionHead(dropout=0, shape=(8,)) outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(5,), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 ================================================ FILE: autokeras/blocks/preprocessing.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Blocks for data preprocessing. They are built into keras preprocessing layers and will be part of the Keras model. """ from typing import Optional from typing import Tuple from typing import Union import keras import tree from keras import layers from keras_tuner.engine import hyperparameters from autokeras.engine import block as block_module from autokeras.utils import io_utils from autokeras.utils import utils class Normalization(block_module.Block): """Perform feature-wise normalization on data. Refer to Normalization layer in keras preprocessing layers for more information. # Arguments axis: Integer or tuple of integers, the axis or axes that should be normalized (typically the features axis). We will normalize each element in the specified axis. The default is '-1' (the innermost axis); 0 (the batch axis) is not allowed. """ def __init__(self, axis: int = -1, **kwargs): super().__init__(**kwargs) self.axis = axis def build(self, hp, inputs=None): input_node = tree.flatten(inputs)[0] return layers.Normalization(axis=self.axis)(input_node) def get_config(self): config = super().get_config() config.update({"axis": self.axis}) return config @keras.utils.register_keras_serializable(package="autokeras") class ImageAugmentation(block_module.Block): """Collection of various image augmentation methods. # Arguments translation_factor: A positive float represented as fraction value, or a tuple of 2 representing fraction for translation vertically and horizontally, or a kerastuner.engine.hyperparameters.Choice range of positive floats. For instance, `translation_factor=0.2` result in a random translation factor within 20% of the width and height. If left unspecified, it will be tuned automatically. vertical_flip: Boolean. Whether to flip the image vertically. If left unspecified, it will be tuned automatically. horizontal_flip: Boolean. Whether to flip the image horizontally. If left unspecified, it will be tuned automatically. rotation_factor: Float or kerastuner.engine.hyperparameters.Choice range between [0, 1]. A positive float represented as fraction of 2pi upper bound for rotating clockwise and counter-clockwise. When represented as a single float, lower = upper. If left unspecified, it will be tuned automatically. zoom_factor: A positive float represented as fraction value, or a tuple of 2 representing fraction for zooming vertically and horizontally, or a kerastuner.engine.hyperparameters.Choice range of positive floats. For instance, `zoom_factor=0.2` result in a random zoom factor from 80% to 120%. If left unspecified, it will be tuned automatically. contrast_factor: A positive float represented as fraction of value, or a tuple of size 2 representing lower and upper bound, or a kerastuner.engine.hyperparameters.Choice range of floats to find the optimal value. When represented as a single float, lower = upper. The contrast factor will be randomly picked between [1.0 - lower, 1.0 + upper]. If left unspecified, it will be tuned automatically. """ def __init__( self, translation_factor: Optional[ Union[float, Tuple[float, float], hyperparameters.Choice] ] = None, vertical_flip: Optional[bool] = None, horizontal_flip: Optional[bool] = None, rotation_factor: Optional[Union[float, hyperparameters.Choice]] = None, zoom_factor: Optional[ Union[float, Tuple[float, float], hyperparameters.Choice] ] = None, contrast_factor: Optional[ Union[float, Tuple[float, float], hyperparameters.Choice] ] = None, **kwargs ): super().__init__(**kwargs) self.translation_factor = utils.get_hyperparameter( translation_factor, hyperparameters.Choice("translation_factor", [0.0, 0.1]), Union[float, Tuple[float, float]], ) self.horizontal_flip = horizontal_flip self.vertical_flip = vertical_flip self.rotation_factor = utils.get_hyperparameter( rotation_factor, hyperparameters.Choice("rotation_factor", [0.0, 0.1]), float, ) self.zoom_factor = utils.get_hyperparameter( zoom_factor, hyperparameters.Choice("zoom_factor", [0.0, 0.1]), Union[float, Tuple[float, float]], ) self.contrast_factor = utils.get_hyperparameter( contrast_factor, hyperparameters.Choice("contrast_factor", [0.0, 0.1]), Union[float, Tuple[float, float]], ) @staticmethod def _get_fraction_value(value): if isinstance(value, tuple): return value return value, value def build(self, hp, inputs=None): input_node = tree.flatten(inputs)[0] output_node = input_node # Translate translation_factor = utils.add_to_hp(self.translation_factor, hp) if translation_factor not in [0, (0, 0)]: height_factor, width_factor = self._get_fraction_value( translation_factor ) output_node = layers.RandomTranslation(height_factor, width_factor)( output_node ) # Flip horizontal_flip = self.horizontal_flip if horizontal_flip is None: horizontal_flip = hp.Boolean("horizontal_flip", default=True) vertical_flip = self.vertical_flip if self.vertical_flip is None: vertical_flip = hp.Boolean("vertical_flip", default=True) if not horizontal_flip and not vertical_flip: flip_mode = "" elif horizontal_flip and vertical_flip: flip_mode = "horizontal_and_vertical" elif horizontal_flip and not vertical_flip: flip_mode = "horizontal" elif not horizontal_flip and vertical_flip: flip_mode = "vertical" if flip_mode != "": output_node = layers.RandomFlip(mode=flip_mode)(output_node) # Rotate rotation_factor = utils.add_to_hp(self.rotation_factor, hp) if rotation_factor != 0: output_node = layers.RandomRotation(rotation_factor)(output_node) # Zoom zoom_factor = utils.add_to_hp(self.zoom_factor, hp) if zoom_factor not in [0, (0, 0)]: height_factor, width_factor = self._get_fraction_value(zoom_factor) # TODO: Add back RandomZoom when it is ready. # output_node = layers.RandomZoom( # height_factor, width_factor)(output_node) # Contrast contrast_factor = utils.add_to_hp(self.contrast_factor, hp) if contrast_factor not in [0, (0, 0)]: output_node = layers.RandomContrast(contrast_factor)(output_node) return output_node def get_config(self): config = super().get_config() config.update( { "translation_factor": io_utils.serialize_block_arg( self.translation_factor ), "horizontal_flip": self.horizontal_flip, "vertical_flip": self.vertical_flip, "rotation_factor": io_utils.serialize_block_arg( self.rotation_factor ), "zoom_factor": io_utils.serialize_block_arg(self.zoom_factor), "contrast_factor": io_utils.serialize_block_arg( self.contrast_factor ), } ) return config @classmethod def from_config(cls, config): config["translation_factor"] = io_utils.deserialize_block_arg( config["translation_factor"] ) config["rotation_factor"] = io_utils.deserialize_block_arg( config["rotation_factor"] ) config["zoom_factor"] = io_utils.deserialize_block_arg( config["zoom_factor"] ) config["contrast_factor"] = io_utils.deserialize_block_arg( config["contrast_factor"] ) return cls(**config) ================================================ FILE: autokeras/blocks/preprocessing_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import keras import keras_tuner import tree from keras_tuner.engine import hyperparameters from autokeras import blocks from autokeras import test_utils def test_augment_build_return_tensor(): block = blocks.ImageAugmentation(rotation_factor=0.2) outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32, 32, 3), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_augment_build_with_translation_factor_range_return_tensor(): block = blocks.ImageAugmentation(translation_factor=(0, 0.1)) outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32, 32, 3), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_augment_build_with_no_flip_return_tensor(): block = blocks.ImageAugmentation(vertical_flip=False, horizontal_flip=False) outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32, 32, 3), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_augment_build_with_vflip_only_return_tensor(): block = blocks.ImageAugmentation(vertical_flip=True, horizontal_flip=False) outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32, 32, 3), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_augment_build_with_zoom_factor_return_tensor(): block = blocks.ImageAugmentation(zoom_factor=0.1) outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32, 32, 3), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_augment_build_with_contrast_factor_return_tensor(): block = blocks.ImageAugmentation(contrast_factor=0.1) outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32, 32, 3), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_augment_deserialize_to_augment(): serialized_block = blocks.serialize( blocks.ImageAugmentation( zoom_factor=0.1, contrast_factor=hyperparameters.Float("contrast_factor", 0.1, 0.5), ) ) block = blocks.deserialize(serialized_block) assert isinstance(block, blocks.ImageAugmentation) assert block.zoom_factor == 0.1 assert isinstance(block.contrast_factor, hyperparameters.Float) def test_augment_get_config_has_all_attributes(): block = blocks.ImageAugmentation() config = block.get_config() assert test_utils.get_func_args(blocks.ImageAugmentation.__init__).issubset( config.keys() ) ================================================ FILE: autokeras/blocks/reduction.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from typing import Optional import keras import tree from keras import layers from keras import ops from autokeras.engine import block as block_module from autokeras.utils import layer_utils from autokeras.utils import utils REDUCTION_TYPE = "reduction_type" FLATTEN = "flatten" GLOBAL_MAX = "global_max" GLOBAL_AVG = "global_avg" def shape_compatible(shape1, shape2): if len(shape1) != len(shape2): return False # TODO: If they can be the same after passing through any layer, # they are compatible. e.g. (32, 32, 3), (16, 16, 2) are compatible return shape1[:-1] == shape2[:-1] @keras.utils.register_keras_serializable(package="autokeras") class Merge(block_module.Block): """Merge block to merge multiple nodes into one. # Arguments merge_type: String. 'add' or 'concatenate'. If left unspecified, it will be tuned automatically. """ def __init__(self, merge_type: Optional[str] = None, **kwargs): super().__init__(**kwargs) self.merge_type = merge_type def get_config(self): config = super().get_config() config.update({"merge_type": self.merge_type}) return config def build(self, hp, inputs=None): inputs = tree.flatten(inputs) if len(inputs) == 1: return inputs if not all( [ shape_compatible(input_node.shape, inputs[0].shape) for input_node in inputs ] ): inputs = [Flatten().build(hp, input_node) for input_node in inputs] # TODO: Even inputs have different shape[-1], they can still be Add( # ) after another layer. Check if the inputs are all of the same # shape if self._inputs_same_shape(inputs): merge_type = self.merge_type or hp.Choice( "merge_type", ["add", "concatenate"], default="add" ) if merge_type == "add": return layers.Add()(inputs) return layers.Concatenate()(inputs) def _inputs_same_shape(self, inputs): return all(input_node.shape == inputs[0].shape for input_node in inputs) @keras.utils.register_keras_serializable(package="autokeras") class Flatten(block_module.Block): """Flatten the input tensor with Keras Flatten layer.""" def build(self, hp, inputs=None): inputs = tree.flatten(inputs) utils.validate_num_inputs(inputs, 1) input_node = inputs[0] if len(input_node.shape) > 2: return layers.Flatten()(input_node) return input_node @keras.utils.register_keras_serializable(package="autokeras") class Reduction(block_module.Block): def __init__(self, reduction_type: Optional[str] = None, **kwargs): super().__init__(**kwargs) self.reduction_type = reduction_type def get_config(self): config = super().get_config() config.update({REDUCTION_TYPE: self.reduction_type}) return config def global_max(self, input_node): raise NotImplementedError def global_avg(self, input_node): raise NotImplementedError def build(self, hp, inputs=None): inputs = tree.flatten(inputs) utils.validate_num_inputs(inputs, 1) input_node = inputs[0] output_node = input_node # No need to reduce. if len(output_node.shape) <= 2: return output_node if self.reduction_type is not None: return self._build_block(hp, output_node, self.reduction_type) reduction_type = hp.Choice( REDUCTION_TYPE, [FLATTEN, GLOBAL_MAX, GLOBAL_AVG] ) with hp.conditional_scope(REDUCTION_TYPE, [reduction_type]): return self._build_block(hp, output_node, reduction_type) def _build_block(self, hp, output_node, reduction_type): if reduction_type == FLATTEN: output_node = Flatten().build(hp, output_node) elif reduction_type == GLOBAL_MAX: output_node = self.global_max(output_node) elif reduction_type == GLOBAL_AVG: output_node = self.global_avg(output_node) return output_node @keras.utils.register_keras_serializable(package="autokeras") class SpatialReduction(Reduction): """Reduce the dimension of a spatial tensor, e.g. image, to a vector. # Arguments reduction_type: String. 'flatten', 'global_max' or 'global_avg'. If left unspecified, it will be tuned automatically. """ def __init__(self, reduction_type: Optional[str] = None, **kwargs): super().__init__(reduction_type, **kwargs) def global_max(self, input_node): return layer_utils.get_global_max_pooling(input_node.shape)()( input_node ) def global_avg(self, input_node): return layer_utils.get_global_average_pooling(input_node.shape)()( input_node ) @keras.utils.register_keras_serializable(package="autokeras") class TemporalReduction(Reduction): """Reduce the dim of a temporal tensor, e.g. output of RNN, to a vector. # Arguments reduction_type: String. 'flatten', 'global_max' or 'global_avg'. If left unspecified, it will be tuned automatically. """ def __init__(self, reduction_type: Optional[str] = None, **kwargs): super().__init__(reduction_type, **kwargs) def global_max(self, input_node): return ops.max(input_node, axis=-2) def global_avg(self, input_node): return ops.mean(input_node, axis=-2) ================================================ FILE: autokeras/blocks/reduction_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import keras import keras_tuner import tree from autokeras import blocks from autokeras import test_utils def test_merge_build_return_tensor(): block = blocks.Merge() outputs = block.build( keras_tuner.HyperParameters(), [ keras.Input(shape=(32,), dtype="float32"), keras.Input(shape=(4, 8), dtype="float32"), ], ) assert len(tree.flatten(outputs)) == 1 def test_merge_single_input_return_tensor(): block = blocks.Merge() outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32,), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_merge_inputs_with_same_shape_return_tensor(): block = blocks.Merge() outputs = block.build( keras_tuner.HyperParameters(), [ keras.Input(shape=(32,), dtype="float32"), keras.Input(shape=(32,), dtype="float32"), ], ) assert len(tree.flatten(outputs)) == 1 def test_merge_deserialize_to_merge(): serialized_block = blocks.serialize(blocks.Merge()) block = blocks.deserialize(serialized_block) assert isinstance(block, blocks.Merge) def test_merge_get_config_has_all_attributes(): block = blocks.Merge() config = block.get_config() assert test_utils.get_func_args(blocks.Merge.__init__).issubset( config.keys() ) def test_temporal_build_return_tensor(): block = blocks.TemporalReduction() outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32, 10), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_temporal_global_max_return_tensor(): block = blocks.TemporalReduction(reduction_type="global_max") outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32, 10), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_temporal_global_avg_return_tensor(): block = blocks.TemporalReduction(reduction_type="global_avg") outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32, 10), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_reduction_2d_tensor_return_input_node(): block = blocks.TemporalReduction() input_node = keras.Input(shape=(32,), dtype="float32") outputs = block.build( keras_tuner.HyperParameters(), input_node, ) assert len(tree.flatten(outputs)) == 1 assert tree.flatten(outputs)[0] is input_node def test_temporal_deserialize_to_temporal(): serialized_block = blocks.serialize(blocks.TemporalReduction()) block = blocks.deserialize(serialized_block) assert isinstance(block, blocks.TemporalReduction) def test_temporal_get_config_has_all_attributes(): block = blocks.TemporalReduction() config = block.get_config() assert test_utils.get_func_args(blocks.TemporalReduction.__init__).issubset( config.keys() ) def test_spatial_build_return_tensor(): block = blocks.SpatialReduction() outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32, 32, 3), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_spatial_deserialize_to_spatial(): serialized_block = blocks.serialize(blocks.SpatialReduction()) block = blocks.deserialize(serialized_block) assert isinstance(block, blocks.SpatialReduction) def test_spatial_get_config_has_all_attributes(): block = blocks.SpatialReduction() config = block.get_config() assert test_utils.get_func_args(blocks.SpatialReduction.__init__).issubset( config.keys() ) ================================================ FILE: autokeras/blocks/wrapper.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from typing import Optional import keras import tree from autokeras.blocks import basic from autokeras.blocks import preprocessing from autokeras.blocks import reduction from autokeras.engine import block as block_module from autokeras.utils import utils BLOCK_TYPE = "block_type" RESNET = "resnet" XCEPTION = "xception" VANILLA = "vanilla" EFFICIENT = "efficient" NORMALIZE = "normalize" AUGMENT = "augment" MAX_TOKENS = "max_tokens" @keras.utils.register_keras_serializable(package="autokeras") class ImageBlock(block_module.Block): """Block for image data. The image blocks is a block choosing from ResNetBlock, XceptionBlock, ConvBlock, which is controlled by a hyperparameter, 'block_type'. # Arguments block_type: String. 'resnet', 'xception', 'vanilla'. The type of Block to use. If unspecified, it will be tuned automatically. normalize: Boolean. Whether to channel-wise normalize the images. If unspecified, it will be tuned automatically. augment: Boolean. Whether to do image augmentation. If unspecified, it will be tuned automatically. """ def __init__( self, block_type: Optional[str] = None, normalize: Optional[bool] = None, augment: Optional[bool] = None, **kwargs ): super().__init__(**kwargs) self.block_type = block_type self.normalize = normalize self.augment = augment def get_config(self): config = super().get_config() config.update( { BLOCK_TYPE: self.block_type, NORMALIZE: self.normalize, AUGMENT: self.augment, } ) return config def _build_block(self, hp, output_node, block_type): if block_type == RESNET: return basic.ResNetBlock().build(hp, output_node) elif block_type == XCEPTION: return basic.XceptionBlock().build(hp, output_node) elif block_type == VANILLA: return basic.ConvBlock().build(hp, output_node) elif block_type == EFFICIENT: return basic.EfficientNetBlock().build(hp, output_node) def build(self, hp, inputs=None): input_node = tree.flatten(inputs)[0] output_node = input_node if self.normalize is None and hp.Boolean(NORMALIZE): with hp.conditional_scope(NORMALIZE, [True]): output_node = preprocessing.Normalization().build( hp, output_node ) elif self.normalize: output_node = preprocessing.Normalization().build(hp, output_node) if self.augment is None and hp.Boolean(AUGMENT): with hp.conditional_scope(AUGMENT, [True]): output_node = preprocessing.ImageAugmentation().build( hp, output_node ) elif self.augment: output_node = preprocessing.ImageAugmentation().build( hp, output_node ) if self.block_type is None: block_type = hp.Choice( BLOCK_TYPE, [RESNET, XCEPTION, VANILLA, EFFICIENT] ) with hp.conditional_scope(BLOCK_TYPE, [block_type]): output_node = self._build_block(hp, output_node, block_type) else: output_node = self._build_block(hp, output_node, self.block_type) return output_node @keras.utils.register_keras_serializable(package="autokeras") class TextBlock(block_module.Block): """Block for text data. # Arguments max_tokens: Int. The maximum size of the vocabulary. If left unspecified, it will be tuned automatically. """ def __init__(self, max_tokens=None, **kwargs): super().__init__(**kwargs) self.max_tokens = max_tokens def build(self, hp, inputs=None): input_node = tree.flatten(inputs)[0] output_node = input_node output_node = self._build_block(hp, output_node) return output_node def get_config(self): config = super().get_config() config.update({"max_tokens": self.max_tokens}) return config def _build_block(self, hp, output_node): # Use Embedding and dense layers for tokenized text max_tokens = self.max_tokens or hp.Choice( MAX_TOKENS, [500, 5000, 20000], default=5000 ) output_node = basic.Embedding( max_features=max_tokens + 1, ).build(hp, output_node) output_node = basic.ConvBlock().build(hp, output_node) output_node = reduction.SpatialReduction().build(hp, output_node) output_node = basic.DenseBlock().build(hp, output_node) return output_node @keras.utils.register_keras_serializable(package="autokeras") class GeneralBlock(block_module.Block): """A general neural network block when the input type is unknown. When the input type is unknown. The GeneralBlock would search in a large space for a good model. # Arguments name: String. """ def build(self, hp, inputs=None): inputs = tree.flatten(inputs) utils.validate_num_inputs(inputs, 1) input_node = inputs[0] output_node = input_node output_node = reduction.Flatten().build(hp, output_node) output_node = basic.DenseBlock().build(hp, output_node) return output_node @keras.utils.register_keras_serializable(package="autokeras") class StructuredDataBlock(block_module.Block): """Block for structured data. # Arguments categorical_encoding: Boolean. Whether to use the CategoricalToNumerical to encode the categorical features to numerical features. Defaults to True. normalize: Boolean. Whether to normalize the features. If unspecified, it will be tuned automatically. """ def __init__(self, normalize: Optional[bool] = None, **kwargs): super().__init__(**kwargs) self.normalize = normalize def get_config(self): config = super().get_config() config.update( { "normalize": self.normalize, } ) return config def build(self, hp, inputs=None): input_node = tree.flatten(inputs)[0] output_node = input_node if self.normalize is None and hp.Boolean(NORMALIZE): with hp.conditional_scope(NORMALIZE, [True]): output_node = preprocessing.Normalization().build( hp, output_node ) elif self.normalize: output_node = preprocessing.Normalization().build(hp, output_node) output_node = basic.DenseBlock().build(hp, output_node) return output_node ================================================ FILE: autokeras/blocks/wrapper_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import keras import keras_tuner import tree from autokeras import analysers from autokeras import blocks from autokeras import test_utils def test_image_build_return_tensor(): block = blocks.ImageBlock() outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32, 32, 3), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_general_build_return_tensor(): block = blocks.GeneralBlock() outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32, 32, 3), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_image_block_xception_return_tensor(): block = blocks.ImageBlock(block_type="xception") outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32, 32, 3), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_image_block_normalize_return_tensor(): block = blocks.ImageBlock(normalize=True) outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32, 32, 3), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_image_block_augment_return_tensor(): block = blocks.ImageBlock(augment=True) outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(32, 32, 3), dtype="float32"), ) assert len(tree.flatten(outputs)) == 1 def test_image_deserialize_to_image(): serialized_block = blocks.serialize(blocks.ImageBlock()) block = blocks.deserialize(serialized_block) assert isinstance(block, blocks.ImageBlock) def test_image_get_config_has_all_attributes(): block = blocks.ImageBlock() config = block.get_config() assert test_utils.get_func_args(blocks.ImageBlock.__init__).issubset( config.keys() ) def test_text_build_return_tensor(): block = blocks.TextBlock() outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(1,), dtype="string") ) assert len(tree.flatten(outputs)) == 1 def test_text_deserialize_to_text(): serialized_block = blocks.serialize(blocks.TextBlock()) block = blocks.deserialize(serialized_block) assert isinstance(block, blocks.TextBlock) def test_text_get_config_has_all_attributes(): block = blocks.TextBlock() config = block.get_config() assert test_utils.get_func_args(blocks.TextBlock.__init__).issubset( config.keys() ) def test_structured_build_return_tensor(): block = blocks.StructuredDataBlock() block.column_names = ["0", "1"] block.column_types = {"0": analysers.NUMERICAL, "1": analysers.NUMERICAL} outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(2,), dtype="string") ) assert len(tree.flatten(outputs)) == 1 def test_structured_block_normalize_return_tensor(): block = blocks.StructuredDataBlock(normalize=True) block.column_names = ["0", "1"] block.column_types = {"0": analysers.NUMERICAL, "1": analysers.NUMERICAL} outputs = block.build( keras_tuner.HyperParameters(), keras.Input(shape=(2,), dtype="string") ) assert len(tree.flatten(outputs)) == 1 def test_structured_block_search_normalize_return_tensor(): block = blocks.StructuredDataBlock(name="a") block.column_names = ["0", "1"] block.column_types = {"0": analysers.NUMERICAL, "1": analysers.NUMERICAL} hp = keras_tuner.HyperParameters() hp.values["a/" + blocks.wrapper.NORMALIZE] = True outputs = block.build(hp, keras.Input(shape=(2,), dtype="string")) assert len(tree.flatten(outputs)) == 1 def test_structured_deserialize_to_structured(): serialized_block = blocks.serialize(blocks.StructuredDataBlock()) block = blocks.deserialize(serialized_block) assert isinstance(block, blocks.StructuredDataBlock) def test_structured_get_config_has_all_attributes(): block = blocks.StructuredDataBlock() config = block.get_config() assert test_utils.get_func_args( blocks.StructuredDataBlock.__init__ ).issubset(config.keys()) ================================================ FILE: autokeras/conftest.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import shutil import keras import pytest @pytest.fixture(autouse=True) def clear_session(): keras.backend.clear_session() yield keras.backend.clear_session() @pytest.fixture(autouse=True) def remove_tmp_path(tmp_path): yield shutil.rmtree(tmp_path) @pytest.fixture(autouse=True) def disable_traceback_filtering(): keras.config.disable_traceback_filtering() yield ================================================ FILE: autokeras/engine/__init__.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ================================================ FILE: autokeras/engine/adapter.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. class Adapter(object): """Adpat the input and output format for Keras Model. Adapter is used by the input nodes and the heads of the hypermodel. It do some type checking for the data and converts it to compatible format. """ def check(self, dataset): """Check if the dataset is valid for the input node. # Arguments dataset: numpy.ndarray. The dataset to be checked. # Returns Boolean. Whether the dataset is in compatible format. """ return True def adapt(self, dataset): """Check, convert and batch the dataset. # Arguments dataset: Usually numpy.ndarray. The dataset to be converted. # Returns numpy.ndarray. The converted dataset. """ self.check(dataset) return dataset ================================================ FILE: autokeras/engine/adapter_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from autokeras.engine import adapter as adapter_module def test_adapter_check_return_true(): adapter = adapter_module.Adapter() assert adapter.check(None) ================================================ FILE: autokeras/engine/analyser.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np class Analyser(object): """Analyze the dataset for useful information. Analyser is used by the input nodes and the heads of the hypermodel. It analyzes the dataset to get useful information, e.g., the shape of the data, the data type of the dataset. The information will be used by the input nodes and heads to construct the data pipeline and to build the Keras Model. """ def __init__(self, **kwargs): super().__init__(**kwargs) # Shape is a list of integers self.shape = None self.dtype = None self.num_samples = 0 self.batch_size = None def update(self, data): """Update the statistics with a batch of data. # Arguments data: np.ndarray. The entire dataset. """ if self.dtype is None: if np.issubdtype(data.dtype, np.str_) or np.issubdtype( data.dtype, np.bytes_ ): self.dtype = "string" else: self.dtype = str(data.dtype) if self.shape is None: self.shape = list(data.shape) if self.batch_size is None: self.batch_size = data.shape[0] self.num_samples += data.shape[0] def finalize(self): """Process recorded information after all updates.""" raise NotImplementedError ================================================ FILE: autokeras/engine/analyser_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import pytest from autokeras.engine.analyser import Analyser def test_analyser_update_unicode_string_dtype(): analyser = Analyser() data = np.array(["hello", "world"], dtype="U10") analyser.update(data) assert analyser.dtype == "string" assert analyser.shape == [2] assert analyser.batch_size == 2 assert analyser.num_samples == 2 def test_analyser_update_byte_string_dtype(): analyser = Analyser() data = np.array([b"hello", b"world"], dtype="S10") analyser.update(data) assert analyser.dtype == "string" assert analyser.shape == [2] assert analyser.batch_size == 2 assert analyser.num_samples == 2 def test_analyser_update_numeric_dtype(): analyser = Analyser() data = np.array([1, 2, 3], dtype=np.int32) analyser.update(data) assert analyser.dtype == "int32" assert analyser.shape == [3] assert analyser.batch_size == 3 assert analyser.num_samples == 3 def test_analyser_update_float_dtype(): analyser = Analyser() data = np.array([1.0, 2.0, 3.0], dtype=np.float64) analyser.update(data) assert analyser.dtype == "float64" assert analyser.shape == [3] assert analyser.batch_size == 3 assert analyser.num_samples == 3 def test_analyser_finalize_not_implemented(): analyser = Analyser() with pytest.raises(NotImplementedError): analyser.finalize() ================================================ FILE: autokeras/engine/block.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import tree from autokeras.engine import named_hypermodel from autokeras.engine import node as node_module class Block(named_hypermodel.NamedHyperModel): """The base class for different Block. The Block can be connected together to build the search space for an AutoModel. Notably, many args in the __init__ function are defaults to be a tunable variable when not specified by the user. """ def __init__(self, **kwargs): super().__init__(**kwargs) self.inputs = None self.outputs = None self._num_output_node = 1 def _build_wrapper(self, hp, *args, **kwargs): with hp.name_scope(self.name): return super()._build_wrapper(hp, *args, **kwargs) def __call__(self, inputs): """Functional API. # Arguments inputs: A list of input node(s) or a single input node for the block. # Returns list: A list of output node(s) of the Block. """ self.inputs = tree.flatten(inputs) for input_node in self.inputs: if not isinstance(input_node, node_module.Node): raise TypeError( "Expect the inputs to block {name} to be " "a Node, but got {type}.".format( name=self.name, type=type(input_node) ) ) input_node.add_out_block(self) self.outputs = [] for _ in range(self._num_output_node): output_node = node_module.Node() output_node.add_in_block(self) self.outputs.append(output_node) return self.outputs def build(self, hp, inputs=None): """Build the Block into a real Keras Model. The subclasses should override this function and return the output node. # Arguments hp: HyperParameters. The hyperparameters for building the model. inputs: A list of input node(s). """ raise NotImplementedError ================================================ FILE: autokeras/engine/block_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import pytest from autokeras.engine import block as block_module def test_block_call_raise_inputs_type_error(): block = block_module.Block() with pytest.raises(TypeError) as info: block(None) assert "Expect the inputs to block" in str(info.value) ================================================ FILE: autokeras/engine/head.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from typing import Optional import keras from autokeras.engine import io_hypermodel from autokeras.utils import types def serialize_metrics(metrics): serialized = [] for metric in metrics: if isinstance(metric, str): serialized.append([metric]) else: serialized.append(keras.metrics.serialize(metric)) return serialized def deserialize_metrics(metrics): deserialized = [] for metric in metrics: if isinstance(metric, list): deserialized.append(metric[0]) else: deserialized.append(keras.metrics.deserialize(metric)) return deserialized def serialize_loss(loss): if isinstance(loss, str): return [loss] return keras.losses.serialize(loss) def deserialize_loss(loss): if isinstance(loss, list): return loss[0] return keras.losses.deserialize(loss) class Head(io_hypermodel.IOHyperModel): """Base class for the heads, e.g. classification, regression. # Arguments loss: A Keras loss function. Defaults to None. If None, the loss will be inferred from the AutoModel. metrics: A list of Keras metrics. Defaults to None. If None, the metrics will be inferred from the AutoModel. """ def __init__( self, loss: Optional[types.LossType] = None, metrics: Optional[types.MetricsType] = None, **kwargs ): super().__init__(**kwargs) self.loss = loss if metrics is None: metrics = [] self.metrics = metrics # Mark if the head should directly output the input tensor. def get_config(self): config = super().get_config() config.update( { "loss": serialize_loss(self.loss), "metrics": serialize_metrics(self.metrics), } ) return config @classmethod def from_config(cls, config): config["loss"] = deserialize_loss(config["loss"]) config["metrics"] = deserialize_metrics(config["metrics"]) return super().from_config(config) def build(self, hp, inputs=None): raise NotImplementedError ================================================ FILE: autokeras/engine/head_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from autokeras.engine import head as head_module def test_init_head_none_metrics(): assert isinstance(head_module.Head().metrics, list) ================================================ FILE: autokeras/engine/hyper_preprocessor.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from autokeras.engine import named_hypermodel class HyperPreprocessor(named_hypermodel.NamedHyperModel): """Input data preprocessor search space. This class defines the search space for a Preprocessor. """ def build(self, hp, dataset): """Build the input preprocessor. # Arguments hp: `HyperParameters` instance. The hyperparameters for building the a Preprocessor. dataset: np.ndarray. The dataset to be preprocessed. # Returns an instance of Preprocessor. """ raise NotImplementedError ================================================ FILE: autokeras/engine/io_hypermodel.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from autokeras.engine import block as block_module class IOHyperModel(block_module.Block): """A mixin class connecting the input nodes and heads with the adapters. This class is extended by the input nodes and the heads. The AutoModel calls the functions to get the corresponding adapters and pass the information back to the input nodes and heads. """ def __init__(self, shape=None, **kwargs): super().__init__(**kwargs) self.shape = shape self.data_shape = None self.dtype = None self.batch_size = None self.num_samples = None def get_analyser(self): """Get the corresponding Analyser. # Returns An instance of a subclass of autokeras.engine.Analyser. """ raise NotImplementedError def get_adapter(self): """Get the corresponding Adapter. # Returns An instance of a subclass of autokeras.engine.Adapter. """ raise NotImplementedError def config_from_analyser(self, analyser): """Load the learned information on dataset from the Analyser. # Arguments adapter: An instance of a subclass of autokeras.engine.Adapter. """ self.data_shape = analyser.shape self.dtype = analyser.dtype self.batch_size = analyser.batch_size self.num_samples = analyser.num_samples def get_hyper_preprocessors(self): """Construct a list of HyperPreprocessors based on learned information. # Returns A list of HyperPreprocessors for the corresponding data. """ raise NotImplementedError def get_config(self): config = super().get_config() config.update({"shape": self.shape}) return config ================================================ FILE: autokeras/engine/named_hypermodel.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import keras import keras_tuner from autokeras.engine import serializable from autokeras.utils import utils class NamedHyperModel(keras_tuner.HyperModel, serializable.Serializable): """ # Arguments name: String. The name of the HyperModel. If unspecified, it will be set automatically with the class name. """ def __init__(self, name: str = None, **kwargs): if not name: prefix = self.__class__.__name__ name = prefix + "_" + str(keras.backend.get_uid(prefix)) name = utils.to_snake_case(name) super().__init__(name=name, **kwargs) def get_config(self): """Get the configuration of the preprocessor. # Returns A dictionary of configurations of the preprocessor. """ return {"name": self.name, "tunable": self.tunable} ================================================ FILE: autokeras/engine/node.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. class Node(object): """The nodes in a network connecting the blocks.""" def __init__(self, **kwargs): super().__init__(**kwargs) self.in_blocks = [] self.out_blocks = [] def add_in_block(self, hypermodel): self.in_blocks.append(hypermodel) def add_out_block(self, hypermodel): self.out_blocks.append(hypermodel) ================================================ FILE: autokeras/engine/node_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ================================================ FILE: autokeras/engine/preprocessor.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from autokeras.engine import serializable class Preprocessor(serializable.Serializable): """A preprocessor for np.ndarray. A preprocessor transforms the dataset (numpy.ndarray). """ def fit(self, dataset): """Fit the preprocessor with the dataset. # Arguments dataset: an instance of `np.ndarray`. """ # TODO: may need to change to a streaming way of fit to reduce the # number of iterations through the dataset for speed. Need to be # decided when we have more use cases for this fit. pass def transform(self, dataset): """Transform the dataset wth the preprocessor. # Arguments dataset: an instance of `np.ndarray`. # Returns The transformed dataset. """ raise NotImplementedError def get_config(self): return {} class TargetPreprocessor(Preprocessor): """Preprocessor for target data.""" def postprocess(self, dataset): """Postprocess the output of the Keras model. # Arguments dataset: numpy.ndarray. The corresponding output of the model. # Returns numpy.ndarray. The postprocessed data. """ raise NotImplementedError ================================================ FILE: autokeras/engine/serializable.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. class Serializable(object): """Serializable from and to JSON with same mechanism as Keras Layer.""" def get_config(self): """Returns the current config of this object. # Returns Dictionary. """ raise NotImplementedError @classmethod def from_config(cls, config): """Build an instance from the config of this object. # Arguments config: Dict. The config of the object. """ return cls(**config) ================================================ FILE: autokeras/engine/tuner.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import collections import copy import os import keras import keras_tuner import numpy as np import tree from keras import callbacks as callbacks_module from autokeras import keras_layers from autokeras import pipeline as pipeline_module from autokeras.utils import data_utils from autokeras.utils import utils class AutoTuner(keras_tuner.engine.tuner.Tuner): """A Tuner class based on KerasTuner for AutoKeras. Different from KerasTuner's Tuner class. AutoTuner's not only tunes the Hypermodel which can be directly built into a Keras model, but also the preprocessors. Therefore, a HyperGraph stores the overall search space containing both the Preprocessors and Hypermodel. For every trial, the HyperGraph builds the PreprocessGraph and KerasGraph with the provided HyperParameters. The AutoTuner uses EarlyStopping for acceleration during the search and fully trains the model with full epochs and with both training and validation data. The fully trained model is the best model to be used by AutoModel. # Arguments oracle: keras_tuner Oracle. hypermodel: keras_tuner HyperModel. **kwargs: The args supported by KerasTuner. """ def __init__(self, oracle, hypermodel, **kwargs): # Initialize before super() for reload to work. self._finished = False super().__init__(oracle, hypermodel, **kwargs) # Save or load the HyperModel. self.hypermodel.save(os.path.join(self.project_dir, "graph")) self.hyper_pipeline = None def _populate_initial_space(self): # Override the function to prevent building the model during # initialization. return def get_best_model(self): return self.get_best_models()[0] def get_best_pipeline(self): return pipeline_module.load_pipeline(self.best_pipeline_path) def _pipeline_path(self, trial_id): return os.path.join(self.get_trial_dir(trial_id), "pipeline") def _prepare_model_build(self, hp, **kwargs): """Prepare for building the Keras model. It builds the Pipeline from HyperPipeline, transforms the dataset to set the input shapes and output shapes of the HyperModel. """ x = kwargs["x"] y = kwargs["y"] pipeline = self.hyper_pipeline.build(hp, (x, y)) pipeline.fit((x, y)) (x, y) = pipeline.transform((x, y)) self.hypermodel.set_io_shapes(data_utils.dataset_shape((x, y))) if "validation_data" in kwargs: validation_data = pipeline.transform(kwargs["validation_data"]) else: validation_data = None return pipeline, (x, y), validation_data def _build_and_fit_model(self, trial, *args, **kwargs): model = self._try_build(trial.hyperparameters) ( pipeline, ( kwargs["x"], kwargs["y"], ), kwargs["validation_data"], ) = self._prepare_model_build(trial.hyperparameters, **kwargs) pipeline.save(self._pipeline_path(trial.trial_id)) keras.src.backend.compute_output_spec(model, kwargs["x"]) self.adapt(model, kwargs["x"]) _, history = utils.fit_with_adaptive_batch_size(model, **kwargs) return history @staticmethod def adapt(model, dataset): """Adapt the preprocessing layers in the model.""" # Currently, only support using the original dataset to adapt all the # preprocessing layers before the first non-preprocessing layer. # TODO: Use PreprocessingStage for preprocessing layers adapt. # TODO: Use Keras Tuner for preprocessing layers adapt. x = tree.flatten(dataset) def get_output_layers(tensor): output_layers = [] tensor = tree.flatten(tensor)[0] for layer in model.layers: if isinstance(layer, keras.layers.InputLayer): continue input_node = tree.flatten(layer.input)[0] if input_node is tensor: if isinstance( layer, keras_layers.PreprocessingLayer, ) or hasattr(layer, "adapt"): output_layers.append(layer) return output_layers dq = collections.deque() for index, input_node in enumerate(tree.flatten(model.input)): in_x = x[index] for layer in get_output_layers(input_node): dq.append((layer, in_x)) while len(dq): layer, in_x = dq.popleft() layer.adapt(in_x) out_x = layer(in_x) for next_layer in get_output_layers(layer.output): dq.append((next_layer, out_x)) return model def search( self, epochs=None, callbacks=None, validation_split=0, verbose=1, **fit_kwargs ): """Search for the best HyperParameters. If there is not early-stopping in the callbacks, the early-stopping callback is injected to accelerate the search process. At the end of the search, the best model will be fully trained with the specified number of epochs. # Arguments callbacks: A list of callback functions. Defaults to None. validation_split: Float. """ if self._finished: return if callbacks is None: callbacks = [] self.hypermodel.set_fit_args(validation_split, epochs=epochs) # Insert early-stopping for adaptive number of epochs. epochs_provided = True if epochs is None: epochs_provided = False epochs = 1000 if not utils.contain_instance( callbacks, callbacks_module.EarlyStopping ): callbacks.append( callbacks_module.EarlyStopping(patience=10, min_delta=1e-4) ) # Insert early-stopping for acceleration. early_stopping_inserted = False new_callbacks = self._deepcopy_callbacks(callbacks) if not utils.contain_instance( callbacks, callbacks_module.EarlyStopping ): early_stopping_inserted = True new_callbacks.append( callbacks_module.EarlyStopping(patience=10, min_delta=1e-4) ) # Populate initial search space. hp = self.oracle.get_space() self._prepare_model_build(hp, **fit_kwargs) self._try_build(hp) self.oracle.update_space(hp) super().search( epochs=epochs, callbacks=new_callbacks, verbose=verbose, **fit_kwargs ) # Train the best model use validation data. # Train the best model with enough number of epochs. if validation_split > 0 or early_stopping_inserted: copied_fit_kwargs = copy.copy(fit_kwargs) # Remove early-stopping since no validation data. # Remove early-stopping since it is inserted. copied_fit_kwargs["callbacks"] = self._remove_early_stopping( callbacks ) # Decide the number of epochs. copied_fit_kwargs["epochs"] = epochs if not epochs_provided: copied_fit_kwargs["epochs"] = self._get_best_trial_epochs() # Concatenate training and validation data. if validation_split > 0: x, y = copied_fit_kwargs["x"], copied_fit_kwargs["y"] x_val, y_val = fit_kwargs["validation_data"] copied_fit_kwargs["x"] = tree.map_structure( lambda train, val: np.concatenate([train, val], axis=0), x, x_val, ) copied_fit_kwargs["y"] = tree.map_structure( lambda train, val: np.concatenate([train, val], axis=0), y, y_val, ) copied_fit_kwargs.pop("validation_data") self.hypermodel.set_fit_args(0, epochs=copied_fit_kwargs["epochs"]) copied_fit_kwargs["verbose"] = verbose pipeline, model, history = self.final_fit(**copied_fit_kwargs) else: # TODO: Add return history functionality in Keras Tuner model = self.get_best_model() history = None pipeline = pipeline_module.load_pipeline( self._pipeline_path(self.oracle.get_best_trials(1)[0].trial_id) ) model.save(self.best_model_path) pipeline.save(self.best_pipeline_path) self._finished = True return history def get_state(self): state = super().get_state() state.update({"finished": self._finished}) return state def set_state(self, state): super().set_state(state) self._finished = state.get("finished") @staticmethod def _remove_early_stopping(callbacks): return [ copy.deepcopy(callbacks) for callback in callbacks if not isinstance(callback, callbacks_module.EarlyStopping) ] def _get_best_trial_epochs(self): best_trial = self.oracle.get_best_trials(1)[0] # steps counts from 0, so epochs = step + 1. return self.oracle.get_trial(best_trial.trial_id).best_step + 1 def _build_best_model(self): best_trial = self.oracle.get_best_trials(1)[0] best_hp = best_trial.hyperparameters return self._try_build(best_hp) def final_fit(self, **kwargs): best_trial = self.oracle.get_best_trials(1)[0] best_hp = best_trial.hyperparameters ( pipeline, (kwargs["x"], kwargs["y"]), kwargs["validation_data"], ) = self._prepare_model_build(best_hp, **kwargs) model = self._build_best_model() self.adapt(model, kwargs["x"]) model, history = utils.fit_with_adaptive_batch_size(model, **kwargs) return pipeline, model, history @property def best_model_path(self): return os.path.join(self.project_dir, "best_model.keras") @property def best_pipeline_path(self): return os.path.join(self.project_dir, "best_pipeline") @property def objective(self): return self.oracle.objective @property def max_trials(self): return self.oracle.max_trials ================================================ FILE: autokeras/engine/tuner_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from unittest import mock import keras import numpy as np import tree import autokeras as ak from autokeras import test_utils from autokeras.tuners import greedy def called_with_early_stopping(func): callbacks = func.call_args_list[0][1]["callbacks"] return any( [ isinstance(callback, keras.callbacks.EarlyStopping) for callback in callbacks ] ) @mock.patch("keras_tuner.engine.base_tuner.BaseTuner.search") @mock.patch("autokeras.engine.tuner.AutoTuner.final_fit") @mock.patch("autokeras.engine.tuner.AutoTuner._prepare_model_build") def test_final_fit_with_specified_epochs(_, final_fit, super_search, tmp_path): tuner = greedy.Greedy( hypermodel=test_utils.build_graph(), directory=tmp_path ) final_fit.return_value = mock.Mock(), mock.Mock(), mock.Mock() tuner.search(x=None, epochs=10, validation_data=None) assert final_fit.call_args_list[0][1]["epochs"] == 10 @mock.patch("keras_tuner.engine.base_tuner.BaseTuner.search") @mock.patch("autokeras.engine.tuner.AutoTuner.final_fit") @mock.patch("autokeras.engine.tuner.AutoTuner._prepare_model_build") def test_tuner_call_super_with_early_stopping( _, final_fit, super_search, tmp_path ): tuner = greedy.Greedy( hypermodel=test_utils.build_graph(), directory=tmp_path ) final_fit.return_value = mock.Mock(), mock.Mock(), mock.Mock() tuner.search(x=None, epochs=10, validation_data=None) assert called_with_early_stopping(super_search) @mock.patch("keras_tuner.engine.base_tuner.BaseTuner.search") @mock.patch("autokeras.engine.tuner.AutoTuner.final_fit") @mock.patch( "autokeras.engine.tuner.AutoTuner.get_best_models", return_value=[mock.Mock()], ) @mock.patch("autokeras.engine.tuner.AutoTuner._prepare_model_build") @mock.patch("autokeras.pipeline.load_pipeline") @mock.patch("keras_tuner.Oracle.get_best_trials", return_value=[mock.Mock()]) def test_no_final_fit_without_epochs_and_fov( _, _1, _2, get_best_models, final_fit, super_search, tmp_path ): tuner = greedy.Greedy( hypermodel=test_utils.build_graph(), directory=tmp_path ) tuner.search(x=None, epochs=None, validation_data=None) final_fit.assert_not_called() @mock.patch("keras_tuner.engine.base_tuner.BaseTuner.search") @mock.patch("autokeras.engine.tuner.AutoTuner.final_fit") @mock.patch( "autokeras.engine.tuner.AutoTuner._get_best_trial_epochs", return_value=2 ) @mock.patch("autokeras.engine.tuner.AutoTuner._prepare_model_build") def test_final_fit_best_epochs_if_epoch_unspecified( _, best_epochs, final_fit, super_search, tmp_path ): tuner = greedy.Greedy( hypermodel=test_utils.build_graph(), directory=tmp_path ) final_fit.return_value = mock.Mock(), mock.Mock(), mock.Mock() tuner.search( x=np.random.rand(100, 32, 32, 3), y=np.random.rand(100, 1), epochs=None, validation_split=0.2, validation_data=(np.random.rand(20, 32, 32, 3), np.random.rand(20, 1)), ) assert final_fit.call_args_list[0][1]["epochs"] == 2 @mock.patch("keras_tuner.engine.base_tuner.BaseTuner.search") @mock.patch("autokeras.engine.tuner.AutoTuner.final_fit") @mock.patch( "autokeras.engine.tuner.AutoTuner._get_best_trial_epochs", return_value=2 ) @mock.patch("autokeras.engine.tuner.AutoTuner._prepare_model_build") def test_super_with_1k_epochs_if_epoch_unspecified( _, best_epochs, final_fit, super_search, tmp_path ): tuner = greedy.Greedy( hypermodel=test_utils.build_graph(), directory=tmp_path ) final_fit.return_value = mock.Mock(), mock.Mock(), mock.Mock() # TODO: try to use x, and y instead of tuple input across the lib. tuner.search( x=np.random.rand(100, 32, 32, 3), y=np.random.rand(100, 1), epochs=None, validation_split=0.2, validation_data=(np.random.rand(20, 32, 32, 3), np.random.rand(20, 1)), ) assert super_search.call_args_list[0][1]["epochs"] == 1000 assert called_with_early_stopping(super_search) @mock.patch("keras_tuner.engine.base_tuner.BaseTuner.search") @mock.patch("autokeras.engine.tuner.AutoTuner.final_fit") @mock.patch("autokeras.engine.tuner.AutoTuner._prepare_model_build") def test_tuner_not_call_super_search_with_overwrite( _, final_fit, super_search, tmp_path ): tuner = greedy.Greedy( hypermodel=test_utils.build_graph(), directory=tmp_path ) final_fit.return_value = mock.Mock(), mock.Mock(), mock.Mock() tuner.search(x=None, epochs=10, validation_data=None) tuner.save() super_search.reset_mock() tuner = greedy.Greedy( hypermodel=test_utils.build_graph(), directory=tmp_path ) tuner.search(x=None, epochs=10, validation_data=None) super_search.assert_not_called() def test_tuner_does_not_crash_with_distribution_strategy(tmp_path): tuner = greedy.Greedy( hypermodel=test_utils.build_graph(), directory=tmp_path, ) tuner.hypermodel.build(tuner.oracle.hyperparameters) def test_adapt_with_model_with_preprocessing_layer_only(): input_node = keras.Input(shape=(10,)) output_node = keras.layers.Normalization()(input_node) model = keras.Model(input_node, output_node) greedy.Greedy.adapt( model, (np.random.rand(100, 10), np.random.rand(100, 10)), ) def test_build_block_in_blocks_with_same_name(tmp_path): class Block1(ak.Block): def build(self, hp, inputs): hp.Boolean("a") return keras.layers.Dense(3)(tree.flatten(inputs)[0]) class Block2(ak.Block): def build(self, hp, inputs): hp.Boolean("b") return Block1().build(hp, inputs) inputs = ak.Input() outputs = Block2()(inputs) outputs = ak.RegressionHead()(outputs) auto_model = ak.AutoModel(inputs, outputs, max_trials=5, directory=tmp_path) auto_model.fit(np.random.rand(100, 5), np.random.rand(100, 1), epochs=1) trials = [ trial for trial_id, trial in auto_model.tuner.oracle.trials.items() ] for trial in trials: assert len(trial.hyperparameters.values) == len( trials[0].hyperparameters.values ) ================================================ FILE: autokeras/graph.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import keras import keras_tuner import tree from autokeras import blocks as blocks_module from autokeras import nodes as nodes_module from autokeras.engine import head as head_module from autokeras.engine import serializable from autokeras.utils import io_utils def feature_encoding_input(block): """Fetch the column_types and column_names. The values are fetched for FeatureEncoding from StructuredDataInput. """ block.column_types = block.inputs[0].column_types block.column_names = block.inputs[0].column_names # Compile the graph. COMPILE_FUNCTIONS = { blocks_module.StructuredDataBlock: [feature_encoding_input], } def load_graph(filepath, custom_objects=None): if custom_objects is None: custom_objects = {} with keras.utils.custom_object_scope(custom_objects): return Graph.from_config(io_utils.load_json(filepath)) class Graph(keras_tuner.HyperModel, serializable.Serializable): """A graph consists of connected Blocks, or Heads. # Arguments inputs: A list of input node(s) for the Graph. outputs: A list of output node(s) for the Graph. """ def __init__(self, inputs=None, outputs=None, **kwargs): super().__init__(**kwargs) self.inputs = tree.flatten(inputs) self.outputs = tree.flatten(outputs) self._node_to_id = {} self._nodes = [] self.blocks = [] self._block_to_id = {} if inputs and outputs: self._build_network() # Temporary attributes self.epochs = None self.num_samples = None def _build_network(self): self._node_to_id = {} # Recursively find all the interested nodes. for input_node in self.inputs: self._search_network(input_node, self.outputs, set(), set()) self._nodes = sorted( list(self._node_to_id.keys()), key=lambda x: self._node_to_id[x] ) for node in self.inputs + self.outputs: if node not in self._node_to_id: raise ValueError("Inputs and outputs not connected.") # Find the blocks. blocks = [] for input_node in self._nodes: for block in input_node.out_blocks: if ( any( [ output_node in self._node_to_id for output_node in block.outputs ] ) and block not in blocks ): blocks.append(block) # Check if all the inputs of the blocks are set as inputs. for block in blocks: for input_node in block.inputs: if input_node not in self._node_to_id: raise ValueError( "A required input is missing for HyperModel " "{name}.".format(name=block.name) ) # Calculate the in degree of all the nodes in_degree = [0] * len(self._nodes) for node_id, node in enumerate(self._nodes): in_degree[node_id] = len( [block for block in node.in_blocks if block in blocks] ) # Add the blocks in topological order. self.blocks = [] self._block_to_id = {} while len(blocks) != 0: new_added = [] # Collect blocks with in degree 0. for block in blocks: if any( [in_degree[self._node_to_id[node]] for node in block.inputs] ): continue new_added.append(block) # Remove the collected blocks from blocks. for block in new_added: blocks.remove(block) for block in new_added: # Add the collected blocks to the Graph. self._add_block(block) # Decrease the in degree of the output nodes. for output_node in block.outputs: output_node_id = self._node_to_id[output_node] in_degree[output_node_id] -= 1 def _search_network( self, input_node, outputs, in_stack_nodes, visited_nodes ): visited_nodes.add(input_node) in_stack_nodes.add(input_node) outputs_reached = False if input_node in outputs: outputs_reached = True for block in input_node.out_blocks: for output_node in block.outputs: if output_node in in_stack_nodes: raise ValueError("The network has a cycle.") if output_node not in visited_nodes: self._search_network( output_node, outputs, in_stack_nodes, visited_nodes ) if output_node in self._node_to_id.keys(): outputs_reached = True if outputs_reached: self._add_node(input_node) in_stack_nodes.remove(input_node) def _add_block(self, block): if block not in self.blocks: block_id = len(self.blocks) self._block_to_id[block] = block_id self.blocks.append(block) def _add_node(self, input_node): if input_node not in self._node_to_id: self._node_to_id[input_node] = len(self._node_to_id) def get_config(self): blocks = [blocks_module.serialize(block) for block in self.blocks] nodes = { str(self._node_to_id[node]): nodes_module.serialize(node) for node in self.inputs } block_inputs = { str(block_id): [self._node_to_id[node] for node in block.inputs] for block_id, block in enumerate(self.blocks) } block_outputs = { str(block_id): [self._node_to_id[node] for node in block.outputs] for block_id, block in enumerate(self.blocks) } outputs = [self._node_to_id[node] for node in self.outputs] return { "blocks": blocks, # Dict {id: serialized}. "nodes": nodes, # Dict {id: serialized}. "outputs": outputs, # List of node_ids. "block_inputs": block_inputs, # Dict {id: List of node_ids}. "block_outputs": block_outputs, # Dict {id: List of node_ids}. } @classmethod def from_config(cls, config): blocks = [ blocks_module.deserialize(block) for block in config["blocks"] ] nodes = { int(node_id): nodes_module.deserialize(node) for node_id, node in config["nodes"].items() } inputs = [nodes[node_id] for node_id in nodes] for block_id, block in enumerate(blocks): input_nodes = [ nodes[node_id] for node_id in config["block_inputs"][str(block_id)] ] output_nodes = tree.flatten(block(input_nodes)) for output_node, node_id in zip( output_nodes, config["block_outputs"][str(block_id)] ): nodes[node_id] = output_node outputs = [nodes[node_id] for node_id in config["outputs"]] return cls(inputs=inputs, outputs=outputs) def compile(self): """Share the information between blocks.""" for block in self.blocks: for func in COMPILE_FUNCTIONS.get(block.__class__, []): func(block) def build(self, hp): """Build the HyperModel into a Keras Model.""" self.compile() keras_nodes = {} keras_input_nodes = [] for node in self.inputs: node_id = self._node_to_id[node] input_node = node.build_node(hp) output_node = node.build(hp, input_node) keras_input_nodes.append(input_node) keras_nodes[node_id] = output_node for block in self.blocks: temp_inputs = [ keras_nodes[self._node_to_id[input_node]] for input_node in block.inputs ] outputs = block.build(hp, inputs=temp_inputs) outputs = tree.flatten(outputs) for output_node, real_output_node in zip(block.outputs, outputs): keras_nodes[self._node_to_id[output_node]] = real_output_node model = keras.Model( keras_input_nodes, [ keras_nodes[self._node_to_id[output_node]] for output_node in self.outputs ], ) return self._compile_keras_model(hp, model) def _get_metrics(self): metrics = {} for output_node in self.outputs: block = output_node.in_blocks[0] if isinstance(block, head_module.Head): metrics[block.name] = block.metrics return metrics def _get_loss(self): loss = {} for output_node in self.outputs: block = output_node.in_blocks[0] if isinstance(block, head_module.Head): loss[block.name] = block.loss return loss def _compile_keras_model(self, hp, model): # Specify hyperparameters from compile(...) optimizer_name = hp.Choice( "optimizer", ["adam", "sgd", "adam_weight_decay"], default="adam", ) # TODO: add adadelta optimizer when it can optimize embedding layer on # GPU. learning_rate = hp.Choice( "learning_rate", [1e-1, 1e-2, 1e-3, 1e-4, 2e-5, 1e-5], default=1e-3 ) if optimizer_name == "adam": optimizer = keras.optimizers.Adam(learning_rate=learning_rate) elif optimizer_name == "sgd": optimizer = keras.optimizers.SGD(learning_rate=learning_rate) elif optimizer_name == "adam_weight_decay": steps_per_epoch = max( 1, int(self.num_samples / (self.batch_size or 32)) ) num_train_steps = steps_per_epoch * self.epochs lr_schedule = keras.optimizers.schedules.PolynomialDecay( initial_learning_rate=learning_rate, decay_steps=num_train_steps, end_learning_rate=0.0, ) optimizer = keras.optimizers.AdamW( learning_rate=lr_schedule, weight_decay=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-6, ) model.compile( optimizer=optimizer, metrics=self._get_metrics(), loss=self._get_loss(), ) return model def save(self, filepath): io_utils.save_json(filepath, self.get_config()) def set_io_shapes(self, shapes): for node, shape in zip(self.inputs, tree.flatten(shapes[0])): node.shape = tuple(shape[1:]) for node, shape in zip(self.outputs, tree.flatten(shapes[1])): node.in_blocks[0].shape = tuple(shape[1:]) def set_fit_args(self, validation_split, epochs=None): self.epochs = epochs # Epochs not specified by the user if self.epochs is None: self.epochs = 1 validation_split = validation_split or 0 # num_samples from analysers are before split self.num_samples = self.inputs[0].num_samples * (1 - validation_split) @property def batch_size(self): return self.inputs[0].batch_size ================================================ FILE: autokeras/graph_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import keras_tuner import pytest import autokeras as ak from autokeras import graph as graph_module def test_input_output_disconnect(): input_node1 = ak.Input() output_node = input_node1 _ = ak.DenseBlock()(output_node) input_node = ak.Input() output_node = input_node output_node = ak.DenseBlock()(output_node) output_node = ak.RegressionHead()(output_node) with pytest.raises(ValueError) as info: graph_module.Graph(inputs=input_node1, outputs=output_node) assert "Inputs and outputs not connected." in str(info.value) def test_hyper_graph_cycle(): input_node1 = ak.Input() input_node2 = ak.Input() output_node1 = ak.DenseBlock()(input_node1) output_node2 = ak.DenseBlock()(input_node2) output_node = ak.Merge()([output_node1, output_node2]) head = ak.RegressionHead() output_node = head(output_node) head.outputs = output_node1 with pytest.raises(ValueError) as info: graph_module.Graph( inputs=[input_node1, input_node2], outputs=output_node ) assert "The network has a cycle." in str(info.value) def test_input_missing(): input_node1 = ak.Input() input_node2 = ak.Input() output_node1 = ak.DenseBlock()(input_node1) output_node2 = ak.DenseBlock()(input_node2) output_node = ak.Merge()([output_node1, output_node2]) output_node = ak.RegressionHead()(output_node) with pytest.raises(ValueError) as info: graph_module.Graph(inputs=input_node1, outputs=output_node) assert "A required input is missing for HyperModel" in str(info.value) def test_graph_basics(): input_node = ak.Input(shape=(30,)) output_node = input_node output_node = ak.DenseBlock()(output_node) output_node = ak.RegressionHead(shape=(1,))(output_node) model = graph_module.Graph(inputs=input_node, outputs=output_node).build( keras_tuner.HyperParameters() ) assert model.input_shape == (None, 30) assert model.output_shape == (None, 1) def test_adamw_optimizer(): input_node = ak.Input(shape=(30,)) output_node = input_node output_node = ak.DenseBlock()(output_node) output_node = ak.RegressionHead(shape=(1,))(output_node) hp = keras_tuner.HyperParameters() hp.Choice("optimizer", ["adam", "sgd", "adam_weight_decay"], default="adam") hp.values["optimizer"] = "adam_weight_decay" graph = graph_module.Graph(inputs=input_node, outputs=output_node) graph.inputs[0].num_samples = 100 graph.inputs[0].batch_size = 32 graph.epochs = 10 graph.set_fit_args(0, epochs=10) model = graph.build(hp) assert model.input_shape == (None, 30) assert model.output_shape == (None, 1) def test_graph_save_load(tmp_path): input1 = ak.Input() input2 = ak.Input() output1 = ak.DenseBlock()(input1) output2 = ak.ConvBlock()(input2) output = ak.Merge()([output1, output2]) output1 = ak.RegressionHead()(output) output2 = ak.ClassificationHead()(output) graph = graph_module.Graph( inputs=[input1, input2], outputs=[output1, output2], ) path = os.path.join(tmp_path, "graph") graph.save(path) graph = graph_module.load_graph(path) assert len(graph.inputs) == 2 assert len(graph.outputs) == 2 assert isinstance(graph.inputs[0].out_blocks[0], ak.DenseBlock) assert isinstance(graph.inputs[1].out_blocks[0], ak.ConvBlock) def test_merge(): input_node1 = ak.Input(shape=(30,)) input_node2 = ak.Input(shape=(40,)) output_node1 = ak.DenseBlock()(input_node1) output_node2 = ak.DenseBlock()(input_node2) output_node = ak.Merge()([output_node1, output_node2]) output_node = ak.RegressionHead(shape=(1,))(output_node) model = graph_module.Graph( inputs=[input_node1, input_node2], outputs=output_node ).build(keras_tuner.HyperParameters()) assert model.input_shape == [(None, 30), (None, 40)] assert model.output_shape == (None, 1) def test_save_custom_metrics_loss(tmp_path): def custom_metric(y_pred, y_true): return 1 def custom_loss(y_pred, y_true): return y_pred - y_true head = ak.ClassificationHead( loss=custom_loss, metrics=["accuracy", custom_metric] ) input_node = ak.Input() output_node = head(input_node) graph = graph_module.Graph(input_node, output_node) path = os.path.join(tmp_path, "graph") graph.save(path) new_graph = graph_module.load_graph( path, custom_objects={ "custom_metric": custom_metric, "custom_loss": custom_loss, }, ) assert new_graph.blocks[0].metrics[1](0, 0) == 1 assert new_graph.blocks[0].loss(3, 2) == 1 def test_graph_can_init_with_one_missing_output(): input_node = ak.ImageInput() output_node = ak.ConvBlock()(input_node) output_node = ak.RegressionHead()(output_node) ak.ClassificationHead()(output_node) graph_module.Graph(input_node, output_node) def test_set_fit_args_with_none_validation_split(): input_node = ak.Input(shape=(30,)) output_node = input_node output_node = ak.DenseBlock()(output_node) output_node = ak.RegressionHead(shape=(1,))(output_node) graph = graph_module.Graph(inputs=input_node, outputs=output_node) graph.inputs[0].num_samples = 100 graph.inputs[0].batch_size = 32 graph.set_fit_args(None, epochs=1) assert graph.num_samples == 100 # Should handle None as 0 ================================================ FILE: autokeras/hyper_preprocessors.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import keras from autokeras import preprocessors from autokeras.engine import hyper_preprocessor from autokeras.utils import utils def serialize(encoder): return utils.serialize_keras_object(encoder) def deserialize(config, custom_objects=None): return utils.deserialize_keras_object( config, module_objects=globals(), custom_objects=custom_objects, ) @keras.utils.register_keras_serializable(package="autokeras") class DefaultHyperPreprocessor(hyper_preprocessor.HyperPreprocessor): """HyperPreprocessor without Hyperparameters to tune. It would always return the same preprocessor. No hyperparameters to be tuned. # Arguments preprocessor: The Preprocessor to return when calling build. """ def __init__(self, preprocessor, *args, **kwargs): super().__init__(*args, **kwargs) self.preprocessor = preprocessor def build(self, hp, dataset): return self.preprocessor def get_config(self): config = super().get_config() config.update( {"preprocessor": preprocessors.serialize(self.preprocessor)} ) return config @classmethod def from_config(cls, config): config["preprocessor"] = preprocessors.deserialize( config["preprocessor"] ) return super().from_config(config) ================================================ FILE: autokeras/hyper_preprocessors_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np from autokeras import hyper_preprocessors from autokeras import preprocessors def test_serialize_and_deserialize_default_hpps(): preprocessor = preprocessors.AddOneDimension() hyper_preprocessor = hyper_preprocessors.DefaultHyperPreprocessor( preprocessor ) hyper_preprocessor = hyper_preprocessors.deserialize( hyper_preprocessors.serialize(hyper_preprocessor) ) assert isinstance( hyper_preprocessor.preprocessor, preprocessors.AddOneDimension ) def test_serialize_and_deserialize_default_hpps_categorical(): x_train = np.array([["a", "ab", 2.1], ["b", "bc", 1.0], ["a", "bc", "nan"]]) preprocessor = preprocessors.CategoricalToNumerical( column_names=["column_a", "column_b", "column_c"], column_types={ "column_a": "categorical", "column_b": "categorical", "column_c": "numerical", }, ) hyper_preprocessor = hyper_preprocessors.DefaultHyperPreprocessor( preprocessor ) hyper_preprocessor.preprocessor.fit(x_train) hyper_preprocessor = hyper_preprocessors.deserialize( hyper_preprocessors.serialize(hyper_preprocessor) ) assert isinstance( hyper_preprocessor.preprocessor, preprocessors.CategoricalToNumerical, ) results = hyper_preprocessor.preprocessor.transform(x_train) assert results[0][0] == results[2][0] assert results[0][0] != results[1][0] assert results[0][1] != results[1][1] assert results[0][1] != results[2][1] assert results[2][2] == 0 assert results.dtype == "float32" ================================================ FILE: autokeras/integration_tests/__init__.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ================================================ FILE: autokeras/integration_tests/functional_api_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import pandas as pd import autokeras as ak from autokeras import test_utils def test_text_and_structured_data(tmp_path): # Prepare the data. num_instances = 3 x_text = test_utils.generate_text_data(num_instances) x_structured_data = pd.read_csv(test_utils.TRAIN_CSV_PATH) x_structured_data = x_structured_data.values x_structured_data = x_structured_data[:num_instances] y_classification = test_utils.generate_one_hot_labels( num_instances=num_instances, num_classes=3 ) y_regression = test_utils.generate_data( num_instances=num_instances, shape=(1,) ) # Build model and train. structured_data_input = ak.StructuredDataInput() structured_data_output = ak.DenseBlock()(structured_data_input) text_input = ak.TextInput() text_output = ak.TextBlock()(text_input) merged_outputs = ak.Merge()((structured_data_output, text_output)) regression_outputs = ak.RegressionHead()(merged_outputs) classification_outputs = ak.ClassificationHead()(merged_outputs) automodel = ak.AutoModel( inputs=[text_input, structured_data_input], directory=tmp_path, outputs=[regression_outputs, classification_outputs], max_trials=2, tuner=ak.Hyperband, seed=test_utils.SEED, ) automodel.fit( (x_text, x_structured_data), (y_regression, y_classification), validation_split=0.2, epochs=1, batch_size=2, ) def test_image_blocks(tmp_path): num_instances = 3 x_train = test_utils.generate_data( num_instances=num_instances, shape=(28, 28) ) y_train = np.random.randint(0, 10, num_instances) input_node = ak.ImageInput() output = ak.Normalization()(input_node) output = ak.ImageAugmentation()(output) outputs1 = ak.ResNetBlock(version="v2")(output) outputs2 = ak.XceptionBlock()(output) output_node = ak.Merge()((outputs1, outputs2)) output_node = ak.ClassificationHead()(output_node) automodel = ak.AutoModel( inputs=input_node, outputs=output_node, directory=tmp_path, max_trials=1, seed=test_utils.SEED, ) automodel.fit( x_train, y_train, validation_data=(x_train, y_train), epochs=1, batch_size=2, ) ================================================ FILE: autokeras/integration_tests/io_api_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import pandas as pd import autokeras as ak from autokeras import test_utils def test_io_api(tmp_path): num_instances = 3 image_x = test_utils.generate_data( num_instances=num_instances, shape=(28, 28) ) text_x = test_utils.generate_text_data(num_instances=num_instances) image_x = image_x[:num_instances] structured_data_x = ( pd.read_csv(test_utils.TRAIN_CSV_PATH) .to_numpy() .astype(str)[:num_instances] ) classification_y = test_utils.generate_one_hot_labels( num_instances=num_instances, num_classes=3 ) regression_y = test_utils.generate_data( num_instances=num_instances, shape=(1,) ) # Build model and train. automodel = ak.AutoModel( inputs=[ak.ImageInput(), ak.TextInput(), ak.StructuredDataInput()], outputs=[ ak.RegressionHead(metrics=["mae"]), ak.ClassificationHead( loss="categorical_crossentropy", metrics=["accuracy"] ), ], directory=tmp_path, max_trials=2, tuner=ak.RandomSearch, seed=test_utils.SEED, ) automodel.fit( [image_x, text_x, structured_data_x], [regression_y, classification_y], epochs=1, validation_split=0.2, batch_size=2, ) automodel.predict([image_x, text_x, structured_data_x]) ================================================ FILE: autokeras/integration_tests/task_api_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import keras import numpy as np import pandas as pd import autokeras as ak from autokeras import test_utils NUM_INSTANCES = 3 BATCH_SIZE = 2 def test_image_classifier(tmp_path): train_x = test_utils.generate_data( num_instances=NUM_INSTANCES, shape=(32, 32) ) train_y = test_utils.generate_one_hot_labels( num_instances=NUM_INSTANCES, num_classes=10 ) clf = ak.ImageClassifier( directory=tmp_path, max_trials=2, seed=test_utils.SEED, ) clf.fit( train_x, train_y, epochs=1, validation_split=0.2, batch_size=BATCH_SIZE ) keras_model = clf.export_model() clf.evaluate(train_x, train_y) assert clf.predict(train_x).shape == (len(train_x), 10) assert isinstance(keras_model, keras.Model) def test_image_regressor(tmp_path): train_x = test_utils.generate_data( num_instances=NUM_INSTANCES, shape=(32, 32, 3) ) train_y = test_utils.generate_data(num_instances=NUM_INSTANCES, shape=(1,)) clf = ak.ImageRegressor( directory=tmp_path, max_trials=2, seed=test_utils.SEED ) clf.fit( train_x, train_y, epochs=1, validation_split=0.2, batch_size=BATCH_SIZE ) clf.export_model() assert clf.predict(train_x).shape == (len(train_x), 1) def test_text_classifier(tmp_path): train_x = test_utils.generate_text_data(num_instances=NUM_INSTANCES) train_y = np.array([0, 1] * ((NUM_INSTANCES + 1) // 2))[:NUM_INSTANCES] test_x = train_x test_y = train_y clf = ak.TextClassifier( directory=tmp_path, max_trials=2, seed=test_utils.SEED, metrics=["accuracy"], objective="accuracy", ) clf.fit( train_x, train_y, epochs=1, validation_data=(test_x, test_y), batch_size=BATCH_SIZE, ) clf.export_model() assert clf.predict(test_x).shape == (len(test_x), 1) assert clf.tuner._get_best_trial_epochs() <= 2 def test_text_regressor(tmp_path): train_x = test_utils.generate_text_data(num_instances=NUM_INSTANCES) test_x = train_x train_y = test_utils.generate_data(num_instances=NUM_INSTANCES, shape=(1,)) test_y = train_y clf = ak.TextRegressor( directory=tmp_path, max_trials=2, seed=test_utils.SEED ) clf.fit( train_x, train_y, epochs=1, validation_data=(test_x, test_y), batch_size=BATCH_SIZE, ) clf.predict(test_x) clf.export_model() assert clf.predict(test_x).shape == (len(test_x), 1) def test_structured_data_regressor(tmp_path): num_data = NUM_INSTANCES * 2 num_train = NUM_INSTANCES data = ( pd.read_csv(test_utils.TRAIN_CSV_PATH).to_numpy().astype(str)[:num_data] ) x_train, x_test = data[:num_train], data[num_train:] y = test_utils.generate_data(num_instances=num_data, shape=tuple()) y_train, y_test = y[:num_train], y[num_train:] clf = ak.StructuredDataRegressor( directory=tmp_path, max_trials=2, seed=test_utils.SEED ) clf.fit( x_train, y_train, epochs=11, validation_data=(x_train, y_train), batch_size=BATCH_SIZE, ) clf.export_model() assert clf.predict(x_test).shape == (len(y_test), 1) def test_structured_data_classifier(tmp_path): num_data = NUM_INSTANCES * 2 num_train = NUM_INSTANCES data = ( pd.read_csv(test_utils.TRAIN_CSV_PATH).to_numpy().astype(str)[:num_data] ) x_train, x_test = data[:num_train], data[num_train:] y = test_utils.generate_one_hot_labels( num_instances=num_data, num_classes=3 ) y_train, y_test = y[:num_train], y[num_train:] clf = ak.StructuredDataClassifier( directory=tmp_path, max_trials=1, seed=test_utils.SEED ) clf.fit( x_train, y_train, epochs=2, validation_data=(x_train, y_train), batch_size=BATCH_SIZE, ) clf.export_model() assert clf.predict(x_test).shape == (len(y_test), 3) ================================================ FILE: autokeras/keras_layers.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import keras from keras import layers from keras import ops from autokeras.utils import data_utils INT = "int" NONE = "none" ONE_HOT = "one-hot" class PreprocessingLayer(layers.Layer): pass @keras.utils.register_keras_serializable() class CastToFloat32(PreprocessingLayer): def get_config(self): return super().get_config() def call(self, inputs): # Does not and needs not handle strings. return data_utils.cast_to_float32(inputs) def adapt(self, data): return @keras.utils.register_keras_serializable() class ExpandLastDim(PreprocessingLayer): def get_config(self): return super().get_config() def call(self, inputs): return ops.expand_dims(inputs, axis=-1) def adapt(self, data): return ================================================ FILE: autokeras/keras_layers_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np from autokeras import keras_layers as layer_module def get_text_data(): train = np.array( [ ["This is a test example"], ["This is another text example"], ["Is this another example?"], [""], ["Is this a long long long long long long example?"], ], dtype=str, ) test = np.array( [ ["This is a test example"], ["This is another text example"], ["Is this another example?"], ], dtype=str, ) y = np.random.rand(3, 1) return train, test, y def test_cast_to_float32_return_float32_tensor(tmp_path): layer = layer_module.CastToFloat32() tensor = layer(np.array([3], dtype="uint8")) assert "float32" == tensor.numpy().dtype def test_expand_last_dim_return_tensor_with_more_dims(tmp_path): layer = layer_module.ExpandLastDim() tensor = layer(np.array([0.1, 0.2], dtype="float32")) assert 2 == len(tensor.shape) ================================================ FILE: autokeras/nodes.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from typing import Dict from typing import List from typing import Optional import keras import tree from autokeras import adapters from autokeras import analysers from autokeras import blocks from autokeras import hyper_preprocessors from autokeras import keras_layers from autokeras import preprocessors from autokeras.engine import io_hypermodel from autokeras.engine import node as node_module from autokeras.utils import utils def serialize(obj): return utils.serialize_keras_object(obj) def deserialize(config, custom_objects=None): return utils.deserialize_keras_object( config, module_objects=globals(), custom_objects=custom_objects, ) @keras.utils.register_keras_serializable(package="autokeras") class Input(node_module.Node, io_hypermodel.IOHyperModel): """Input node for tensor data. The data should be numpy.ndarray. # Arguments name: String. The name of the input node. If unspecified, it will be set automatically with the class name. """ def __init__(self, name: Optional[str] = None, **kwargs): super().__init__(name=name, **kwargs) def build_node(self, hp): return keras.Input(shape=self.shape, dtype=self.dtype) def build(self, hp, inputs=None): input_node = tree.flatten(inputs)[0] return keras_layers.CastToFloat32()(input_node) def get_adapter(self): return adapters.InputAdapter() def get_analyser(self): return analysers.InputAnalyser() def get_block(self): return blocks.GeneralBlock() def get_hyper_preprocessors(self): return [] class ImageInput(Input): """Input node for image data. The input data should be numpy.ndarray. The shape of the data should be should be (samples, width, height) or (samples, width, height, channels). # Arguments name: String. The name of the input node. If unspecified, it will be set automatically with the class name. """ def __init__(self, name: Optional[str] = None, **kwargs): super().__init__(name=name, **kwargs) def build(self, hp, inputs=None): inputs = super().build(hp, inputs) output_node = tree.flatten(inputs)[0] if len(output_node.shape) == 3: output_node = keras_layers.ExpandLastDim()(output_node) return output_node def get_adapter(self): return adapters.ImageAdapter() def get_analyser(self): return analysers.ImageAnalyser() def get_block(self): return blocks.ImageBlock() class TextInput(Input): """Input node for text data. The input data should be numpy.ndarray. The data should be one-dimensional. Each element in the data should be a string which is a full sentence. # Arguments name: String. The name of the input node. If unspecified, it will be set automatically with the class name. """ def __init__(self, name: Optional[str] = None, **kwargs): super().__init__(name=name, **kwargs) def build_node(self, hp): return keras.Input(shape=self.shape, dtype="int32") def build(self, hp, inputs=None): output_node = tree.flatten(inputs)[0] if len(output_node.shape) == 1: output_node = keras_layers.ExpandLastDim()( # pragma: no cover output_node ) return output_node def get_adapter(self): return adapters.TextAdapter() def get_analyser(self): return analysers.TextAnalyser() def get_block(self): return blocks.TextBlock() def get_hyper_preprocessors(self): return [ hyper_preprocessors.DefaultHyperPreprocessor( preprocessors.CastToString() ), hyper_preprocessors.DefaultHyperPreprocessor( preprocessors.TextTokenizer() ), ] class StructuredDataInput(Input): """Input node for structured data. The input data should be numpy.ndarray. The data should be two-dimensional with numerical or categorical values. # Arguments column_names: A list of strings specifying the names of the columns. The length of the list should be equal to the number of columns of the data. Defaults to None. column_types: Dict. The keys are the column names. The values should either be 'numerical' or 'categorical', indicating the type of that column. Defaults to None. If not None, the column_names need to be specified. If None, it will be inferred from the data. A column will be judged as categorical if the number of different values is less than 5% of the number of instances. name: String. The name of the input node. If unspecified, it will be set automatically with the class name. """ def __init__( self, column_names: Optional[List[str]] = None, column_types: Optional[Dict[str, str]] = None, name: Optional[str] = None, **kwargs ): super().__init__(name=name, **kwargs) self.column_names = column_names self.column_types = column_types def get_config(self): config = super().get_config() config.update( { "column_names": self.column_names, "column_types": self.column_types, } ) return config def get_adapter(self): return adapters.StructuredDataAdapter() def get_analyser(self): return analysers.StructuredDataAnalyser( self.column_names, self.column_types ) def get_block(self): return blocks.StructuredDataBlock() def config_from_analyser(self, analyser): super().config_from_analyser(analyser) self.column_names = analyser.column_names # Analyser keeps the specified ones and infer the missing ones. self.column_types = analyser.column_types def build(self, hp, inputs=None): return inputs def get_hyper_preprocessors(self): return [ hyper_preprocessors.DefaultHyperPreprocessor( preprocessors.CategoricalToNumerical( column_names=self.column_names, column_types=self.column_types, ) ) ] ================================================ FILE: autokeras/nodes_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from autokeras import blocks from autokeras import nodes def test_input_get_block_return_general_block(): input_node = nodes.Input() assert isinstance(input_node.get_block(), blocks.GeneralBlock) ================================================ FILE: autokeras/pipeline.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import keras import tree from autokeras import preprocessors as preprocessors_module from autokeras.engine import hyper_preprocessor as hpps_module from autokeras.engine import preprocessor as pps_module from autokeras.utils import io_utils class HyperPipeline(hpps_module.HyperPreprocessor): """A search space consists of HyperPreprocessors. # Arguments inputs: a list of lists of HyperPreprocessors. outputs: a list of lists of HyperPreprocessors. """ def __init__(self, inputs, outputs, **kwargs): super().__init__(**kwargs) self.inputs = inputs self.outputs = outputs @staticmethod def _build_preprocessors(hp, hpps_lists, dataset): sources = tree.flatten(dataset) preprocessors_list = [] for source, hpps_list in zip(sources, hpps_lists): data = source preprocessors = [] for hyper_preprocessor in hpps_list: preprocessor = hyper_preprocessor.build(hp, data) preprocessor.fit(data) data = preprocessor.transform(data) preprocessors.append(preprocessor) preprocessors_list.append(preprocessors) return preprocessors_list def build(self, hp, dataset): """Build a Pipeline by Hyperparameters. # Arguments hp: Hyperparameters. dataset: nested numpy arrays. The input dataset for the model. # Returns An instance of Pipeline. """ x, y = dataset return Pipeline( inputs=self._build_preprocessors(hp, self.inputs, x), outputs=self._build_preprocessors(hp, self.outputs, y), ) def load_pipeline(filepath, custom_objects=None): """Load a Pipeline instance from disk.""" if custom_objects is None: custom_objects = {} with keras.utils.custom_object_scope(custom_objects): return Pipeline.from_config(io_utils.load_json(filepath)) class Pipeline(pps_module.Preprocessor): """A data pipeline for transform the entire dataset. # Arguments inputs: A list of lists of Preprocessors. For the input datasets for the model. outputs: A list of lists of Preprocessors. For the target datasets for the model. """ def __init__(self, inputs, outputs, **kwargs): super().__init__(**kwargs) self.inputs = inputs self.outputs = outputs def fit(self, dataset): """Fit the Preprocessors.""" x, y = dataset sources_x = tree.flatten(x) for pps_list, source in zip(self.inputs, sources_x): for preprocessor in pps_list: preprocessor.fit(source) # pragma: no cover source = preprocessor.transform(source) # pragma: no cover sources_y = tree.flatten(y) for pps_list, source in zip(self.outputs, sources_y): for preprocessor in pps_list: preprocessor.fit(source) source = preprocessor.transform(source) return def transform(self, dataset): """Transform the dataset to be ready for the model. # Arguments dataset: nested numpy arrays. The input dataset for the model. # Returns dataset: nested numpy arrays. The input dataset for the model. """ x, y = dataset x = self.transform_x(x) y = self.transform_y(y) return (x, y) def transform_x(self, dataset): """Transform the input dataset for the model. # Arguments dataset: nested numpy arrays. The input dataset for the model. # Returns dataset: nested numpy arrays. The input dataset for the model. """ return self._transform_data(dataset, self.inputs) def transform_y(self, dataset): """Transform the target dataset for the model. # Arguments dataset: nested numpy arrays. The target dataset for the model. # Returns dataset: nested numpy arrays. The input dataset for the model. """ return self._transform_data(dataset, self.outputs) def _transform_data(self, dataset, pps_lists): sources = tree.flatten(dataset) transformed = [] for pps_list, y in zip(pps_lists, sources): for preprocessor in pps_list: y = preprocessor.transform(y) transformed.append(y) if len(transformed) == 1: return transformed[0] return tuple(transformed) def save(self, filepath): io_utils.save_json(filepath, self.get_config()) def get_config(self): return { "inputs": [ [ preprocessors_module.serialize(preprocessor) for preprocessor in preprocessors ] for preprocessors in self.inputs ], "outputs": [ [ preprocessors_module.serialize(preprocessor) for preprocessor in preprocessors ] for preprocessors in self.outputs ], } @classmethod def from_config(cls, config): return cls( inputs=[ [ preprocessors_module.deserialize(preprocessor) for preprocessor in preprocessors ] for preprocessors in config["inputs"] ], outputs=[ [ preprocessors_module.deserialize(preprocessor) for preprocessor in preprocessors ] for preprocessors in config["outputs"] ], ) def postprocess(self, y): """Postprocess the outputs of the model. # Arguments y: numpy.ndarray or a list of numpy.ndarrays. The output of the Keras model. # Returns A list or an instance of numpy.ndarray. The postprocessed data for the heads. """ outputs = [] for source, preprocessors in zip(tree.flatten(y), self.outputs): for preprocessor in preprocessors[::-1]: if isinstance(preprocessor, pps_module.TargetPreprocessor): source = preprocessor.postprocess(source) outputs.append(source) if len(outputs) == 1: return outputs[0] return outputs ================================================ FILE: autokeras/pipeline_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np from autokeras import pipeline as pipeline_module from autokeras import preprocessors def test_pipeline_postprocess_one_hot_to_labels(): pipeline = pipeline_module.Pipeline( inputs=[[]], outputs=[[preprocessors.OneHotEncoder(["a", "b", "c"])]] ) assert np.array_equal( pipeline.postprocess(np.eye(3)), [["a"], ["b"], ["c"]] ) def test_pipeline_postprocess_multiple_one_hot_to_labels(): pipeline = pipeline_module.Pipeline( inputs=[[]], outputs=[ [preprocessors.OneHotEncoder(["a", "b", "c"])], [preprocessors.OneHotEncoder(["a", "b", "c"])], ], ) result = pipeline.postprocess([np.eye(3), np.eye(3)]) assert np.array_equal(result[0], [["a"], ["b"], ["c"]]) assert np.array_equal(result[1], [["a"], ["b"], ["c"]]) ================================================ FILE: autokeras/preprocessors/__init__.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import keras from autokeras.preprocessors.common import AddOneDimension from autokeras.preprocessors.common import CastToInt32 from autokeras.preprocessors.common import CastToString from autokeras.preprocessors.common import TextTokenizer from autokeras.preprocessors.encoders import CategoricalToNumerical from autokeras.preprocessors.encoders import LabelEncoder from autokeras.preprocessors.encoders import OneHotEncoder from autokeras.preprocessors.postprocessors import SigmoidPostprocessor from autokeras.preprocessors.postprocessors import SoftmaxPostprocessor from autokeras.utils import utils def serialize(preprocessor): return utils.serialize_keras_object(preprocessor) def deserialize(config, custom_objects=None): return utils.deserialize_keras_object( config, module_objects=globals(), custom_objects=custom_objects, ) ================================================ FILE: autokeras/preprocessors/common.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from collections import Counter import keras import numpy as np from autokeras.engine import preprocessor @keras.utils.register_keras_serializable(package="autokeras") class AddOneDimension(preprocessor.Preprocessor): """Append one dimension of size one to the dataset shape.""" def transform(self, dataset): return np.expand_dims(dataset, axis=-1) @keras.utils.register_keras_serializable(package="autokeras") class CastToInt32(preprocessor.Preprocessor): """Cast the dataset shape to int32.""" def transform(self, dataset): return dataset.astype("int32") @keras.utils.register_keras_serializable(package="autokeras") class CastToString(preprocessor.Preprocessor): """Cast the dataset shape to string.""" def transform(self, dataset): if np.issubdtype(dataset.dtype, np.bytes_): return np.array( [x.decode("utf-8", errors="ignore") for x in dataset] ) else: return dataset.astype("str") @keras.utils.register_keras_serializable(package="autokeras") class TextTokenizer(preprocessor.Preprocessor): """Simple text tokenizer that converts strings to integer sequences.""" def __init__(self, max_len=100, vocab=None, max_vocab=500, **kwargs): super().__init__(**kwargs) self.max_len = max_len self.vocab = vocab self.max_vocab = max_vocab def fit(self, dataset): # Build vocab from unique words in the dataset unique_words = [] for text in dataset: words = text.split() unique_words.extend(words) word_counts = Counter(unique_words) sorted_words = sorted(word_counts, key=word_counts.get, reverse=True) self.vocab = { word: idx + 1 for idx, word in enumerate(sorted_words[: self.max_vocab]) } # Start from 1, 0 for padding def transform(self, dataset): # dataset is np.array of strings sequences = [] for text in dataset: words = text.split()[: self.max_len] seq = [self.vocab.get(word, 0) for word in words] # 0 for unknown # Pad with 0s seq += [0] * (self.max_len - len(seq)) sequences.append(seq) return np.array(sequences, dtype=np.int32) def get_config(self): config = super().get_config() config.update( { "max_len": self.max_len, "vocab": self.vocab, "max_vocab": self.max_vocab, } ) return config ================================================ FILE: autokeras/preprocessors/common_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np from autokeras import test_utils from autokeras.preprocessors import common def test_cast_to_int32_return_int32(): x = test_utils.generate_one_hot_labels(100, 10) x = x.astype("uint8") x = common.CastToInt32().transform(x) assert x.dtype == "int32" def test_cast_to_string_with_bytes(): x = np.array([b"hello", b"world"]) result = common.CastToString().transform(x) assert result.dtype.kind in ["U", "S"] # Unicode or byte string assert result[0] == "hello" assert result[1] == "world" def test_cast_to_string_with_strings(): x = np.array(["hello", "world"]) result = common.CastToString().transform(x) assert result.dtype.kind in ["U", "S"] assert result[0] == "hello" assert result[1] == "world" def test_text_tokenizer_vocab_limit(): x = np.array(["word1 word2 word3", "word1 word4 word5"]) tokenizer = common.TextTokenizer(max_vocab=2) tokenizer.fit(x) assert len(tokenizer.vocab) <= 3 # 2 words + 1 for unknown (0 is padding) # word1 should be most frequent assert "word1" in tokenizer.vocab assert tokenizer.vocab["word1"] == 1 def test_text_tokenizer_transform(): x = np.array(["hello world", "hello"]) tokenizer = common.TextTokenizer(max_vocab=10) tokenizer.fit(x) result = tokenizer.transform(x) assert result.shape == (2, 100) # max_len=100 assert result.dtype == np.int32 assert result[0][0] == tokenizer.vocab.get("hello", 0) ================================================ FILE: autokeras/preprocessors/encoders.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import keras import numpy as np from autokeras import analysers from autokeras.engine import preprocessor @keras.utils.register_keras_serializable(package="autokeras") class Encoder(preprocessor.TargetPreprocessor): """Transform labels to encodings. # Arguments labels: A list of labels of any type. The labels to be encoded. """ def __init__(self, labels, **kwargs): super().__init__(**kwargs) self.labels = [ label.decode("utf-8") if isinstance(label, bytes) else str(label) for label in labels ] def get_config(self): return {"labels": self.labels} def fit(self, dataset): return def transform(self, dataset): """Transform labels to integer encodings. # Arguments dataset: numpy.ndarray. The dataset to be transformed. # Returns numpy.ndarray. The transformed dataset. """ label_to_idx = {label: idx for idx, label in enumerate(self.labels)} return np.array( [ label_to_idx.get(str(label), len(self.labels)) for label in dataset.flatten() ] ).reshape(dataset.shape) @keras.utils.register_keras_serializable(package="autokeras") class OneHotEncoder(Encoder): def transform(self, dataset): """Transform labels to one-hot encodings. # Arguments dataset: numpy.ndarray. The dataset to be transformed. # Returns numpy.ndarray. The transformed dataset. """ dataset = super().transform(dataset) eye = np.eye(len(self.labels)) encoded_int = dataset.squeeze(axis=-1) result = np.zeros((len(encoded_int), len(self.labels))) for i, idx in enumerate(encoded_int): if idx < len(self.labels): result[i] = eye[idx] return result def postprocess(self, data): """Transform probabilities back to labels. # Arguments data: numpy.ndarray. The output probabilities of the classification head. # Returns numpy.ndarray. The original labels. """ return np.array( list( map( lambda x: ( self.labels[x] if x < len(self.labels) else "unknown" ), np.argmax(np.array(data), axis=1), ) ) ).reshape(-1, 1) @keras.utils.register_keras_serializable(package="autokeras") class LabelEncoder(Encoder): """Transform the labels to integer encodings.""" def transform(self, dataset): """Transform labels to integer encodings. # Arguments dataset: numpy.ndarray. The dataset to be transformed. # Returns numpy.ndarray. The transformed dataset. """ dataset = super().transform(dataset) return dataset def postprocess(self, data): """Transform probabilities back to labels. # Arguments data: numpy.ndarray. The output probabilities of the classification head. # Returns numpy.ndarray. The original labels. """ return np.array( list( map( lambda x: ( self.labels[int(round(x[0]))] if int(round(x[0])) < len(self.labels) else "unknown" ), np.array(data), ) ) ).reshape(-1, 1) @keras.utils.register_keras_serializable() class CategoricalToNumerical(preprocessor.Preprocessor): """Encode the categorical features to numerical features. # Arguments column_names: A list of strings specifying the names of the columns. The length of the list should be equal to the number of columns of the data. column_types: Dict. The keys are the column names. The values should either be 'numerical' or 'categorical', indicating the type of that column. Defaults to None. If not None, the column_names need to be specified. If None, it will be inferred from the data. """ # TODO: Support one-hot encoding. # TODO: Support frequency encoding. def __init__(self, column_names, column_types, **kwargs): super().__init__(**kwargs) self.column_names = column_names self.column_types = column_types self.encoding = [] for column_name in self.column_names: column_type = self.column_types[column_name] if column_type == analysers.CATEGORICAL: # TODO: Search to use one-hot or int. self.encoding.append("int") else: self.encoding.append("none") self.encoders = [None] * len(self.encoding) def fit(self, dataset): for i, enc_type in enumerate(self.encoding): if enc_type == "none": continue column_data = dataset[:, i] unique_labels = np.unique(column_data) if enc_type == "int": self.encoders[i] = LabelEncoder(unique_labels) elif enc_type == "one_hot": # pragma: no cover self.encoders[i] = OneHotEncoder( # pragma: no cover unique_labels ) else: raise ValueError( # pragma: no cover f"Unsupported encoding: {enc_type}" ) def transform(self, dataset): outputs = [] for i, enc_type in enumerate(self.encoding): column_data = dataset[:, i : i + 1] if enc_type == "none": column_data = column_data.astype("float32") # Replace NaN with 0. imputed = np.where(np.isnan(column_data), 0, column_data) outputs.append(imputed) else: encoded = self.encoders[i].transform(column_data) outputs.append(encoded) return np.concatenate(outputs, axis=1).astype("float32") def get_config(self): encoders_config = [] for enc in self.encoders: if enc is None: encoders_config.append( {"encoder_type": None, "encoder_config": None} ) else: encoder_type = ( "label" if isinstance(enc, LabelEncoder) else "one_hot" ) encoders_config.append( { "encoder_type": encoder_type, "encoder_config": enc.get_config(), } ) return { "column_types": self.column_types, "column_names": self.column_names, "encoding": self.encoding, "encoders": encoders_config, } @classmethod def from_config(cls, config): obj = cls(config["column_names"], config["column_types"]) obj.encoding = config["encoding"] obj.encoders = [] for item in config["encoders"]: if item["encoder_type"] is None: obj.encoders.append(None) elif item["encoder_type"] == "label": obj.encoders.append(LabelEncoder(**item["encoder_config"])) elif item["encoder_type"] == "one_hot": # pragma: no cover obj.encoders.append( # pragma: no cover OneHotEncoder(**item["encoder_config"]) ) return obj ================================================ FILE: autokeras/preprocessors/encoders_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np from autokeras import preprocessors from autokeras.preprocessors import encoders from autokeras.utils import data_utils def test_one_hot_encoder_deserialize_transforms_to_np(): encoder = encoders.OneHotEncoder(["a", "b", "c"]) encoder.fit(np.array(["a", "b", "a"])) encoder = preprocessors.deserialize(preprocessors.serialize(encoder)) one_hot = encoder.transform(np.array([["a"], ["c"], ["b"]])) assert one_hot.shape[1:] == (3,) def test_one_hot_encoder_decode_to_same_string(): encoder = encoders.OneHotEncoder(["a", "b", "c"]) result = encoder.postprocess(np.eye(3)) assert np.array_equal(result, np.array([["a"], ["b"], ["c"]])) def test_label_encoder_decode_to_same_string(): encoder = encoders.LabelEncoder(["a", "b"]) result = encoder.postprocess([[0], [1]]) assert np.array_equal(result, np.array([["a"], ["b"]])) def test_label_encoder_encode_to_correct_shape(): encoder = encoders.LabelEncoder(["a", "b"]) dataset = np.array([["a"], ["b"]]) result = encoder.transform(dataset) print(data_utils.dataset_shape(result)) assert np.array_equal(data_utils.dataset_shape(result), [2, 1]) ================================================ FILE: autokeras/preprocessors/postprocessors.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import keras import numpy as np from autokeras.engine import preprocessor @keras.utils.register_keras_serializable(package="autokeras") class PostProcessor(preprocessor.TargetPreprocessor): def transform(self, dataset): return dataset @keras.utils.register_keras_serializable(package="autokeras") class SigmoidPostprocessor(PostProcessor): """Postprocessor for sigmoid outputs.""" def postprocess(self, data): """Transform probabilities to zeros and ones. # Arguments data: numpy.ndarray. The output probabilities of the classification head. # Returns numpy.ndarray. The zeros and ones predictions. """ data[data < 0.5] = 0 data[data > 0.5] = 1 return data @keras.utils.register_keras_serializable(package="autokeras") class SoftmaxPostprocessor(PostProcessor): """Postprocessor for softmax outputs.""" def postprocess(self, data): """Transform probabilities to zeros and ones. # Arguments data: numpy.ndarray. The output probabilities of the classification head. # Returns numpy.ndarray. The zeros and ones predictions. """ idx = np.argmax(data, axis=-1) data = np.zeros(data.shape) data[np.arange(data.shape[0]), idx] = 1 return data ================================================ FILE: autokeras/preprocessors/postprocessors_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np from autokeras import preprocessors from autokeras.preprocessors import postprocessors def test_sigmoid_postprocess_to_zero_one(): postprocessor = postprocessors.SigmoidPostprocessor() y = postprocessor.postprocess(np.random.rand(10, 3)) assert set(y.flatten().tolist()) == set([1, 0]) def test_sigmoid_deserialize_without_error(): postprocessor = postprocessors.SigmoidPostprocessor() dataset = np.array([1, 2]) postprocessor = preprocessors.deserialize( preprocessors.serialize(postprocessor) ) assert isinstance(postprocessor.transform(dataset), np.ndarray) def test_softmax_postprocess_to_zero_one(): postprocessor = postprocessors.SoftmaxPostprocessor() y = postprocessor.postprocess(np.random.rand(10, 3)) assert set(y.flatten().tolist()) == set([1, 0]) def test_softmax_transform_dataset_doesnt_change(): postprocessor = postprocessors.SoftmaxPostprocessor() dataset = np.array([1, 2]) assert isinstance(postprocessor.transform(dataset), np.ndarray) def test_softmax_deserialize_without_error(): postprocessor = postprocessors.SoftmaxPostprocessor() dataset = np.array([1, 2]) postprocessor = preprocessors.deserialize( preprocessors.serialize(postprocessor) ) assert isinstance(postprocessor.transform(dataset), np.ndarray) ================================================ FILE: autokeras/tasks/__init__.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from autokeras.tasks.image import ImageClassifier from autokeras.tasks.image import ImageRegressor from autokeras.tasks.structured_data import StructuredDataClassifier from autokeras.tasks.structured_data import StructuredDataRegressor from autokeras.tasks.text import TextClassifier from autokeras.tasks.text import TextRegressor ================================================ FILE: autokeras/tasks/image.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from pathlib import Path from typing import List from typing import Optional from typing import Tuple from typing import Type from typing import Union import keras from autokeras import auto_model from autokeras import blocks from autokeras import nodes as input_module from autokeras.engine import tuner from autokeras.tuners import greedy from autokeras.tuners import task_specific from autokeras.utils import types class SupervisedImagePipeline(auto_model.AutoModel): def __init__(self, outputs, **kwargs): super().__init__( inputs=input_module.ImageInput(), outputs=outputs, **kwargs ) class ImageClassifier(SupervisedImagePipeline): """AutoKeras image classification class. # Arguments num_classes: Int. Defaults to None. If None, it will be inferred from the data. multi_label: Boolean. Defaults to False. loss: A Keras loss function. Defaults to use 'binary_crossentropy' or 'categorical_crossentropy' based on the number of classes. metrics: A list of Keras metrics. Defaults to use 'accuracy'. project_name: String. The name of the AutoModel. Defaults to 'image_classifier'. max_trials: Int. The maximum number of different Keras Models to try. The search may finish before reaching the max_trials. Defaults to 100. directory: String. The path to a directory for storing the search outputs. Defaults to None, which would create a folder with the name of the AutoModel in the current directory. objective: String. Name of model metric to minimize or maximize, e.g. 'val_accuracy'. Defaults to 'val_loss'. tuner: String or subclass of AutoTuner. If string, it should be one of 'greedy', 'bayesian', 'hyperband' or 'random'. It can also be a subclass of AutoTuner. If left unspecified, it uses a task specific tuner, which first evaluates the most commonly used models for the task before exploring other models. overwrite: Boolean. Defaults to `False`. If `False`, reloads an existing project of the same name if one is found. Otherwise, overwrites the project. seed: Int. Random seed. max_model_size: Int. Maximum number of scalars in the parameters of a model. Models larger than this are rejected. **kwargs: Any arguments supported by AutoModel. """ def __init__( self, num_classes: Optional[int] = None, multi_label: bool = False, loss: types.LossType = None, metrics: Optional[types.MetricsType] = None, project_name: str = "image_classifier", max_trials: int = 100, directory: Union[str, Path, None] = None, objective: str = "val_loss", tuner: Union[str, Type[tuner.AutoTuner]] = None, overwrite: bool = False, seed: Optional[int] = None, max_model_size: Optional[int] = None, **kwargs ): if tuner is None: tuner = task_specific.ImageClassifierTuner super().__init__( outputs=blocks.ClassificationHead( num_classes=num_classes, multi_label=multi_label, loss=loss, metrics=metrics, ), max_trials=max_trials, directory=directory, project_name=project_name, objective=objective, tuner=tuner, overwrite=overwrite, seed=seed, max_model_size=max_model_size, **kwargs ) def fit( self, x: Optional[types.DatasetType] = None, y: Optional[types.DatasetType] = None, epochs: Optional[int] = None, callbacks: Optional[List[keras.callbacks.Callback]] = None, validation_split: Optional[float] = 0.2, validation_data: Union[ Tuple[types.DatasetType, types.DatasetType], None ] = None, **kwargs ): """Search for the best model and hyperparameters for the AutoModel. It will search for the best model based on the performances on validation data. # Arguments x: numpy.ndarray. Training data x. The shape of the data should be (samples, width, height) or (samples, width, height, channels). y: numpy.ndarray. Training data y. It can be raw labels, one-hot encoded if more than two classes, or binary encoded for binary classification. epochs: Int. The number of epochs to train each model during the search. If unspecified, by default we train for a maximum of 1000 epochs, but we stop training if the validation loss stops improving for 10 epochs (unless you specified an EarlyStopping callback as part of the callbacks argument, in which case the EarlyStopping callback you specified will determine early stopping). callbacks: List of Keras callbacks to apply during training and validation. validation_split: Float between 0 and 1. Defaults to 0.2. Fraction of the training data to be used as validation data. The model will set apart this fraction of the training data, will not train on it, and will evaluate the loss and any model metrics on this data at the end of each epoch. The validation data is selected from the last samples in the `x` and `y` data provided, before shuffling. This argument is not supported when `x` is a dataset. The best model found would be fit on the entire dataset including the validation data. validation_data: Data on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. `validation_data` will override `validation_split`. The type of the validation data should be the same as the training data. The best model found would be fit on the training dataset without the validation data. **kwargs: Any arguments supported by [keras.Model.fit](https://keras.io/api/models/model_training_apis/#fit-method). # Returns history: A Keras History object corresponding to the best model. Its History.history attribute is a record of training loss values and metrics values at successive epochs, as well as validation loss values and validation metrics values (if applicable). """ history = super().fit( x=x, y=y, epochs=epochs, callbacks=callbacks, validation_split=validation_split, validation_data=validation_data, **kwargs ) return history class ImageRegressor(SupervisedImagePipeline): """AutoKeras image regression class. # Arguments output_dim: Int. The number of output dimensions. Defaults to None. If None, it will be inferred from the data. loss: A Keras loss function. Defaults to use 'mean_squared_error'. metrics: A list of Keras metrics. Defaults to use 'mean_squared_error'. project_name: String. The name of the AutoModel. Defaults to 'image_regressor'. max_trials: Int. The maximum number of different Keras Models to try. The search may finish before reaching the max_trials. Defaults to 100. directory: String. The path to a directory for storing the search outputs. Defaults to None, which would create a folder with the name of the AutoModel in the current directory. objective: String. Name of model metric to minimize or maximize, e.g. 'val_accuracy'. Defaults to 'val_loss'. tuner: String or subclass of AutoTuner. If string, it should be one of 'greedy', 'bayesian', 'hyperband' or 'random'. It can also be a subclass of AutoTuner. If left unspecified, it uses a task specific tuner, which first evaluates the most commonly used models for the task before exploring other models. overwrite: Boolean. Defaults to `False`. If `False`, reloads an existing project of the same name if one is found. Otherwise, overwrites the project. seed: Int. Random seed. max_model_size: Int. Maximum number of scalars in the parameters of a model. Models larger than this are rejected. **kwargs: Any arguments supported by AutoModel. """ def __init__( self, output_dim: Optional[int] = None, loss: types.LossType = "mean_squared_error", metrics: Optional[types.MetricsType] = None, project_name: str = "image_regressor", max_trials: int = 100, directory: Union[str, Path, None] = None, objective: str = "val_loss", tuner: Union[str, Type[tuner.AutoTuner]] = None, overwrite: bool = False, seed: Optional[int] = None, max_model_size: Optional[int] = None, **kwargs ): if tuner is None: tuner = greedy.Greedy super().__init__( outputs=blocks.RegressionHead( output_dim=output_dim, loss=loss, metrics=metrics ), max_trials=max_trials, directory=directory, project_name=project_name, objective=objective, tuner=tuner, overwrite=overwrite, seed=seed, max_model_size=max_model_size, **kwargs ) def fit( self, x: Optional[types.DatasetType] = None, y: Optional[types.DatasetType] = None, epochs: Optional[int] = None, callbacks: Optional[List[keras.callbacks.Callback]] = None, validation_split: Optional[float] = 0.2, validation_data: Union[ types.DatasetType, Tuple[types.DatasetType], None ] = None, **kwargs ): """Search for the best model and hyperparameters for the AutoModel. It will search for the best model based on the performances on validation data. # Arguments x: numpy.ndarray. Training data x. The shape of the data should be (samples, width, height) or (samples, width, height, channels). y: numpy.ndarray. Training data y. The targets passing to the head would have to be np.ndarray. It can be single-column or multi-column. The values should all be numerical. epochs: Int. The number of epochs to train each model during the search. If unspecified, by default we train for a maximum of 1000 epochs, but we stop training if the validation loss stops improving for 10 epochs (unless you specified an EarlyStopping callback as part of the callbacks argument, in which case the EarlyStopping callback you specified will determine early stopping). callbacks: List of Keras callbacks to apply during training and validation. validation_split: Float between 0 and 1. Defaults to 0.2. Fraction of the training data to be used as validation data. The model will set apart this fraction of the training data, will not train on it, and will evaluate the loss and any model metrics on this data at the end of each epoch. The validation data is selected from the last samples in the `x` and `y` data provided, before shuffling. This argument is not supported when `x` is a dataset. The best model found would be fit on the entire dataset including the validation data. validation_data: Data on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. `validation_data` will override `validation_split`. The type of the validation data should be the same as the training data. The best model found would be fit on the training dataset without the validation data. **kwargs: Any arguments supported by [keras.Model.fit](https://keras.io/api/models/model_training_apis/#fit-method). # Returns history: A Keras History object corresponding to the best model. Its History.history attribute is a record of training loss values and metrics values at successive epochs, as well as validation loss values and validation metrics values (if applicable). """ history = super().fit( x=x, y=y, epochs=epochs, callbacks=callbacks, validation_split=validation_split, validation_data=validation_data, **kwargs ) return history ================================================ FILE: autokeras/tasks/image_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from unittest import mock import autokeras as ak from autokeras import test_utils @mock.patch("autokeras.AutoModel.fit") def test_img_clf_fit_call_auto_model_fit(fit, tmp_path): auto_model = ak.ImageClassifier(directory=tmp_path, seed=test_utils.SEED) auto_model.fit( x=test_utils.generate_data(num_instances=100, shape=(32, 32, 3)), y=test_utils.generate_one_hot_labels(num_instances=100, num_classes=10), ) assert fit.is_called @mock.patch("autokeras.AutoModel.fit") def test_img_reg_fit_call_auto_model_fit(fit, tmp_path): auto_model = ak.ImageRegressor(directory=tmp_path, seed=test_utils.SEED) auto_model.fit( x=test_utils.generate_data(num_instances=100, shape=(32, 32, 3)), y=test_utils.generate_data(num_instances=100, shape=(1,)), ) assert fit.is_called ================================================ FILE: autokeras/tasks/structured_data.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import pathlib from typing import Dict from typing import List from typing import Optional from typing import Type from typing import Union import tree from autokeras import auto_model from autokeras import blocks from autokeras import nodes as input_module from autokeras.engine import tuner from autokeras.tuners import task_specific from autokeras.utils import types class BaseStructuredDataPipeline(auto_model.AutoModel): def __init__(self, inputs, outputs, **kwargs): self.check(inputs.column_names, inputs.column_types) super().__init__(inputs=inputs, outputs=outputs, **kwargs) self._target_col_name = None def check(self, column_names, column_types): if column_types: for column_type in column_types.values(): if column_type not in ["categorical", "numerical"]: raise ValueError( 'column_types should be either "categorical" ' 'or "numerical", but got {name}'.format( name=column_type ) ) def check_in_fit(self, x): input_node = tree.flatten(self.inputs)[0] if input_node.column_names and input_node.column_types: for column_name in input_node.column_types: if column_name not in input_node.column_names: raise ValueError( "column_names and column_types are " "mismatched. Cannot find column name " "{name} in the data.".format(name=column_name) ) def fit( self, x=None, y=None, epochs=None, callbacks=None, validation_split=0.2, validation_data=None, **kwargs ): """Search for the best model and hyperparameters for the AutoModel. # Arguments x: numpy.ndarray. Training data x. It can be string or numerical data. y: numpy.ndarray. Training data y. It can be raw labels, one-hot encoded if more than two classes, or binary encoded for binary classification. epochs: Int. The number of epochs to train each model during the search. If unspecified, we would use epochs equal to 1000 and early stopping with patience equal to 10. callbacks: List of Keras callbacks to apply during training and validation. validation_split: Float between 0 and 1. Defaults to 0.2. Fraction of the training data to be used as validation data. The model will set apart this fraction of the training data, will not train on it, and will evaluate the loss and any model metrics on this data at the end of each epoch. The validation data is selected from the last samples in the `x` and `y` data provided, before shuffling. This argument is not supported when `x` is a dataset. validation_data: Data on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. `validation_data` will override `validation_split`. The type of the validation data should be the same as the training data. The best model found would be fit on the training dataset without the validation data. **kwargs: Any arguments supported by [keras.Model.fit](https://keras.io/api/models/model_training_apis/#fit-method). # Returns history: A Keras History object corresponding to the best model. Its History.history attribute is a record of training loss values and metrics values at successive epochs, as well as validation loss values and validation metrics values (if applicable). """ self.check_in_fit(x) history = super().fit( x=x, y=y, epochs=epochs, callbacks=callbacks, validation_split=validation_split, validation_data=validation_data, **kwargs ) return history def predict(self, x, **kwargs): """Predict the output for a given testing data. # Arguments x: numpy.ndarray. Testing data x. It can be string or numerical data. **kwargs: Any arguments supported by keras.Model.predict. # Returns A list of numpy.ndarray objects or a single numpy.ndarray. The predicted results. """ return super().predict(x=x, **kwargs) def evaluate(self, x, y=None, **kwargs): """Evaluate the best model for the given data. # Arguments x: numpy.ndarray. Testing data x. It can be string or numerical data. y: numpy.ndarray. Testing data y. It can be string labels or numerical values. **kwargs: Any arguments supported by keras.Model.evaluate. # Returns Scalar test loss (if the model has a single output and no metrics) or list of scalars (if the model has multiple outputs and/or metrics). The attribute model.metrics_names will give you the display labels for the scalar outputs. """ return super().evaluate(x=x, y=y, **kwargs) class SupervisedStructuredDataPipeline(BaseStructuredDataPipeline): def __init__(self, outputs, column_names, column_types, **kwargs): inputs = input_module.StructuredDataInput( column_names=column_names, column_types=column_types ) super().__init__(inputs=inputs, outputs=outputs, **kwargs) class StructuredDataClassifier(SupervisedStructuredDataPipeline): """AutoKeras structured data classification class. # Arguments column_names: A list of strings specifying the names of the columns. The length of the list should be equal to the number of columns of the data excluding the target column. Defaults to None. column_types: Dict. The keys are the column names. The values should either be 'numerical' or 'categorical', indicating the type of that column. Defaults to None. If not None, the column_names need to be specified. If None, it will be inferred from the data. num_classes: Int. Defaults to None. If None, it will be inferred from the data. multi_label: Boolean. Defaults to False. loss: A Keras loss function. Defaults to use 'binary_crossentropy' or 'categorical_crossentropy' based on the number of classes. metrics: A list of Keras metrics. Defaults to use 'accuracy'. project_name: String. The name of the AutoModel. Defaults to 'structured_data_classifier'. max_trials: Int. The maximum number of different Keras Models to try. The search may finish before reaching the max_trials. Defaults to 100. directory: String. The path to a directory for storing the search outputs. Defaults to None, which would create a folder with the name of the AutoModel in the current directory. objective: String. Name of model metric to minimize or maximize. Defaults to 'val_accuracy'. tuner: String or subclass of AutoTuner. If string, it should be one of 'greedy', 'bayesian', 'hyperband' or 'random'. It can also be a subclass of AutoTuner. If left unspecified, it uses a task specific tuner, which first evaluates the most commonly used models for the task before exploring other models. overwrite: Boolean. Defaults to `False`. If `False`, reloads an existing project of the same name if one is found. Otherwise, overwrites the project. seed: Int. Random seed. max_model_size: Int. Maximum number of scalars in the parameters of a model. Models larger than this are rejected. **kwargs: Any arguments supported by AutoModel. """ def __init__( self, column_names: Optional[List[str]] = None, column_types: Optional[Dict] = None, num_classes: Optional[int] = None, multi_label: bool = False, loss: Optional[types.LossType] = None, metrics: Optional[types.MetricsType] = None, project_name: str = "structured_data_classifier", max_trials: int = 100, directory: Optional[Union[str, pathlib.Path]] = None, objective: str = "val_accuracy", tuner: Union[str, Type[tuner.AutoTuner]] = None, overwrite: bool = False, seed: Optional[int] = None, max_model_size: Optional[int] = None, **kwargs ): if tuner is None: tuner = task_specific.StructuredDataClassifierTuner super().__init__( outputs=blocks.ClassificationHead( num_classes=num_classes, multi_label=multi_label, loss=loss, metrics=metrics, ), column_names=column_names, column_types=column_types, max_trials=max_trials, directory=directory, project_name=project_name, objective=objective, tuner=tuner, overwrite=overwrite, seed=seed, max_model_size=max_model_size, **kwargs ) def fit( self, x=None, y=None, epochs=None, callbacks=None, validation_split=0.2, validation_data=None, **kwargs ): """Search for the best model and hyperparameters for the AutoModel. # Arguments x: numpy.ndarray. Training data x. y: numpy.ndarray. Training data y. epochs: Int. The number of epochs to train each model during the search. If unspecified, we would use epochs equal to 1000 and early stopping with patience equal to 10. callbacks: List of Keras callbacks to apply during training and validation. validation_split: Float between 0 and 1. Defaults to 0.2. Fraction of the training data to be used as validation data. The model will set apart this fraction of the training data, will not train on it, and will evaluate the loss and any model metrics on this data at the end of each epoch. The validation data is selected from the last samples in the `x` and `y` data provided, before shuffling. This argument is not supported when `x` is a dataset. validation_data: Data on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. `validation_data` will override `validation_split`. The type of the validation data should be the same as the training data. **kwargs: Any arguments supported by [keras.Model.fit](https://keras.io/api/models/model_training_apis/#fit-method). # Returns history: A Keras History object corresponding to the best model. Its History.history attribute is a record of training loss values and metrics values at successive epochs, as well as validation loss values and validation metrics values (if applicable). """ history = super().fit( x=x, y=y, epochs=epochs, callbacks=callbacks, validation_split=validation_split, validation_data=validation_data, **kwargs ) return history class StructuredDataRegressor(SupervisedStructuredDataPipeline): """AutoKeras structured data regression class. # Arguments column_names: A list of strings specifying the names of the columns. The length of the list should be equal to the number of columns of the data excluding the target column. Defaults to None. column_types: Dict. The keys are the column names. The values should either be 'numerical' or 'categorical', indicating the type of that column. Defaults to None. If not None, the column_names need to be specified. If None, it will be inferred from the data. output_dim: Int. The number of output dimensions. Defaults to None. If None, it will be inferred from the data. loss: A Keras loss function. Defaults to use 'mean_squared_error'. metrics: A list of Keras metrics. Defaults to use 'mean_squared_error'. project_name: String. The name of the AutoModel. Defaults to 'structured_data_regressor'. max_trials: Int. The maximum number of different Keras Models to try. The search may finish before reaching the max_trials. Defaults to 100. directory: String. The path to a directory for storing the search outputs. Defaults to None, which would create a folder with the name of the AutoModel in the current directory. objective: String. Name of model metric to minimize or maximize, e.g. 'val_accuracy'. Defaults to 'val_loss'. tuner: String or subclass of AutoTuner. If string, it should be one of 'greedy', 'bayesian', 'hyperband' or 'random'. It can also be a subclass of AutoTuner. If left unspecified, it uses a task specific tuner, which first evaluates the most commonly used models for the task before exploring other models. overwrite: Boolean. Defaults to `False`. If `False`, reloads an existing project of the same name if one is found. Otherwise, overwrites the project. seed: Int. Random seed. max_model_size: Int. Maximum number of scalars in the parameters of a model. Models larger than this are rejected. **kwargs: Any arguments supported by AutoModel. """ def __init__( self, column_names: Optional[List[str]] = None, column_types: Optional[Dict[str, str]] = None, output_dim: Optional[int] = None, loss: types.LossType = "mean_squared_error", metrics: Optional[types.MetricsType] = None, project_name: str = "structured_data_regressor", max_trials: int = 100, directory: Union[str, pathlib.Path, None] = None, objective: str = "val_loss", tuner: Union[str, Type[tuner.AutoTuner]] = None, overwrite: bool = False, seed: Optional[int] = None, max_model_size: Optional[int] = None, **kwargs ): if tuner is None: tuner = task_specific.StructuredDataRegressorTuner super().__init__( outputs=blocks.RegressionHead( output_dim=output_dim, loss=loss, metrics=metrics ), column_names=column_names, column_types=column_types, max_trials=max_trials, directory=directory, project_name=project_name, objective=objective, tuner=tuner, overwrite=overwrite, seed=seed, max_model_size=max_model_size, **kwargs ) ================================================ FILE: autokeras/tasks/structured_data_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from unittest import mock import numpy as np import pandas as pd import pytest import autokeras as ak from autokeras import test_utils def test_raise_error_unknown_str_in_col_type(tmp_path): with pytest.raises(ValueError) as info: ak.StructuredDataClassifier( column_types={"age": "num", "parch": "categorical"}, directory=tmp_path, seed=test_utils.SEED, ) assert 'column_types should be either "categorical"' in str(info.value) def test_structured_data_input_name_type_mismatch_error(tmp_path): with pytest.raises(ValueError) as info: clf = ak.StructuredDataClassifier( column_types={"_age": "numerical", "parch": "categorical"}, column_names=["age", "fare"], directory=tmp_path, seed=test_utils.SEED, ) clf.fit(x=test_utils.TRAIN_CSV_PATH, y="survived") assert "column_names and column_types are mismatched." in str(info.value) def test_structured_data_col_type_no_name_error(tmp_path): with pytest.raises(ValueError) as info: clf = ak.StructuredDataClassifier( column_types={"age": "numerical", "parch": "categorical"}, directory=tmp_path, seed=test_utils.SEED, ) clf.fit(x=np.random.rand(100, 30), y=np.random.rand(100, 1)) assert "column_names must be specified" in str(info.value) @mock.patch("autokeras.AutoModel.fit") @mock.patch("autokeras.AutoModel.evaluate") def test_structured_clf_evaluate_call_automodel_evaluate( evaluate, fit, tmp_path ): auto_model = ak.StructuredDataClassifier( directory=tmp_path, seed=test_utils.SEED ) auto_model.fit(x=test_utils.TRAIN_CSV_PATH, y="survived") auto_model.evaluate(x=test_utils.TRAIN_CSV_PATH, y="survived") assert evaluate.is_called @mock.patch("autokeras.AutoModel.fit") @mock.patch("autokeras.AutoModel.predict") def test_structured_clf_predict_csv_call_automodel_predict( predict, fit, tmp_path ): auto_model = ak.StructuredDataClassifier( directory=tmp_path, seed=test_utils.SEED ) auto_model.fit(x=test_utils.TRAIN_CSV_PATH, y="survived") auto_model.predict(x=test_utils.TEST_CSV_PATH) assert predict.is_called @mock.patch("autokeras.AutoModel.fit") def test_structured_clf_fit_call_auto_model_fit(fit, tmp_path): auto_model = ak.StructuredDataClassifier( directory=tmp_path, seed=test_utils.SEED ) auto_model.fit( x=pd.read_csv(test_utils.TRAIN_CSV_PATH).to_numpy().astype(str)[:100], y=test_utils.generate_one_hot_labels(num_instances=100, num_classes=3), ) assert fit.is_called @mock.patch("autokeras.AutoModel.fit") def test_structured_reg_fit_call_auto_model_fit(fit, tmp_path): auto_model = ak.StructuredDataRegressor( directory=tmp_path, seed=test_utils.SEED ) auto_model.fit( x=pd.read_csv(test_utils.TRAIN_CSV_PATH).to_numpy().astype(str)[:100], y=test_utils.generate_data(num_instances=100, shape=(1,)), ) assert fit.is_called ================================================ FILE: autokeras/tasks/text.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from pathlib import Path from typing import Optional from typing import Type from typing import Union from autokeras import auto_model from autokeras import blocks from autokeras import nodes as input_module from autokeras.engine import tuner from autokeras.tuners import greedy from autokeras.tuners import task_specific from autokeras.utils import types class SupervisedTextPipeline(auto_model.AutoModel): def __init__(self, outputs, **kwargs): super().__init__( inputs=input_module.TextInput(), outputs=outputs, **kwargs ) class TextClassifier(SupervisedTextPipeline): """AutoKeras text classification class. # Arguments num_classes: Int. Defaults to None. If None, it will be inferred from the data. multi_label: Boolean. Defaults to False. loss: A Keras loss function. Defaults to use 'binary_crossentropy' or 'categorical_crossentropy' based on the number of classes. metrics: A list of Keras metrics. Defaults to use 'accuracy'. project_name: String. The name of the AutoModel. Defaults to 'text_classifier'. max_trials: Int. The maximum number of different Keras Models to try. The search may finish before reaching the max_trials. Defaults to 100. directory: String. The path to a directory for storing the search outputs. Defaults to None, which would create a folder with the name of the AutoModel in the current directory. objective: String. Name of model metric to minimize or maximize, e.g. 'val_accuracy'. Defaults to 'val_loss'. tuner: String or subclass of AutoTuner. If string, it should be one of 'greedy', 'bayesian', 'hyperband' or 'random'. It can also be a subclass of AutoTuner. If left unspecified, it uses a task specific tuner, which first evaluates the most commonly used models for the task before exploring other models. overwrite: Boolean. Defaults to `False`. If `False`, reloads an existing project of the same name if one is found. Otherwise, overwrites the project. seed: Int. Random seed. max_model_size: Int. Maximum number of scalars in the parameters of a model. Models larger than this are rejected. **kwargs: Any arguments supported by AutoModel. """ def __init__( self, num_classes: Optional[int] = None, multi_label: bool = False, loss: types.LossType = None, metrics: Optional[types.MetricsType] = None, project_name: str = "text_classifier", max_trials: int = 100, directory: Union[str, Path, None] = None, objective: str = "val_loss", tuner: Union[str, Type[tuner.AutoTuner]] = None, overwrite: bool = False, seed: Optional[int] = None, max_model_size: Optional[int] = None, **kwargs ): if tuner is None: tuner = task_specific.TextClassifierTuner super().__init__( outputs=blocks.ClassificationHead( num_classes=num_classes, multi_label=multi_label, loss=loss, metrics=metrics, ), max_trials=max_trials, directory=directory, project_name=project_name, objective=objective, tuner=tuner, overwrite=overwrite, seed=seed, max_model_size=max_model_size, **kwargs ) def fit( self, x=None, y=None, epochs=None, callbacks=None, validation_split=0.2, validation_data=None, **kwargs ): """Search for the best model and hyperparameters for the AutoModel. It will search for the best model based on the performances on validation data. # Arguments x: numpy.ndarray. Training data x. The input data should be numpy.ndarray. The data should be one dimensional. Each element in the data should be a string which is a full sentence. y: numpy.ndarray. Training data y. It can be raw labels, one-hot encoded if more than two classes, or binary encoded for binary classification. epochs: Int. The number of epochs to train each model during the search. If unspecified, by default we train for a maximum of 1000 epochs, but we stop training if the validation loss stops improving for 10 epochs (unless you specified an EarlyStopping callback as part of the callbacks argument, in which case the EarlyStopping callback you specified will determine early stopping). callbacks: List of Keras callbacks to apply during training and validation. validation_split: Float between 0 and 1. Defaults to 0.2. Fraction of the training data to be used as validation data. The model will set apart this fraction of the training data, will not train on it, and will evaluate the loss and any model metrics on this data at the end of each epoch. The validation data is selected from the last samples in the `x` and `y` data provided, before shuffling. This argument is not supported when `x` is a dataset. The best model found would be fit on the entire dataset including the validation data. validation_data: Data on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. `validation_data` will override `validation_split`. The type of the validation data should be the same as the training data. The best model found would be fit on the training dataset without the validation data. **kwargs: Any arguments supported by [keras.Model.fit](https://keras.io/api/models/model_training_apis/#fit-method). # Returns history: A Keras History object corresponding to the best model. Its History.history attribute is a record of training loss values and metrics values at successive epochs, as well as validation loss values and validation metrics values (if applicable). """ history = super().fit( x=x, y=y, epochs=epochs, callbacks=callbacks, validation_split=validation_split, validation_data=validation_data, **kwargs ) return history class TextRegressor(SupervisedTextPipeline): """AutoKeras text regression class. # Arguments output_dim: Int. The number of output dimensions. Defaults to None. If None, it will be inferred from the data. loss: A Keras loss function. Defaults to use 'mean_squared_error'. metrics: A list of Keras metrics. Defaults to use 'mean_squared_error'. project_name: String. The name of the AutoModel. Defaults to 'text_regressor'. max_trials: Int. The maximum number of different Keras Models to try. The search may finish before reaching the max_trials. Defaults to 100. directory: String. The path to a directory for storing the search outputs. Defaults to None, which would create a folder with the name of the AutoModel in the current directory. objective: String. Name of model metric to minimize or maximize, e.g. 'val_accuracy'. Defaults to 'val_loss'. tuner: String or subclass of AutoTuner. If string, it should be one of 'greedy', 'bayesian', 'hyperband' or 'random'. It can also be a subclass of AutoTuner. If left unspecified, it uses a task specific tuner, which first evaluates the most commonly used models for the task before exploring other models. overwrite: Boolean. Defaults to `False`. If `False`, reloads an existing project of the same name if one is found. Otherwise, overwrites the project. seed: Int. Random seed. max_model_size: Int. Maximum number of scalars in the parameters of a model. Models larger than this are rejected. **kwargs: Any arguments supported by AutoModel. """ def __init__( self, output_dim: Optional[int] = None, loss: types.LossType = "mean_squared_error", metrics: Optional[types.MetricsType] = None, project_name: str = "text_regressor", max_trials: int = 100, directory: Union[str, Path, None] = None, objective: str = "val_loss", tuner: Union[str, Type[tuner.AutoTuner]] = None, overwrite: bool = False, seed: Optional[int] = None, max_model_size: Optional[int] = None, **kwargs ): if tuner is None: tuner = greedy.Greedy super().__init__( outputs=blocks.RegressionHead( output_dim=output_dim, loss=loss, metrics=metrics ), max_trials=max_trials, directory=directory, project_name=project_name, objective=objective, tuner=tuner, overwrite=overwrite, seed=seed, max_model_size=max_model_size, **kwargs ) def fit( self, x=None, y=None, epochs=None, callbacks=None, validation_split=0.2, validation_data=None, **kwargs ): """Search for the best model and hyperparameters for the AutoModel. It will search for the best model based on the performances on validation data. # Arguments x: numpy.ndarray. Training data x. The input data should be numpy.ndarray. The data should be one dimensional. Each element in the data should be a string which is a full sentence. y: numpy.ndarray. Training data y. The targets passing to the head would have to be np.ndarray,. It can be single-column or multi-column. The values should all be numerical. epochs: Int. The number of epochs to train each model during the search. If unspecified, by default we train for a maximum of 1000 epochs, but we stop training if the validation loss stops improving for 10 epochs (unless you specified an EarlyStopping callback as part of the callbacks argument, in which case the EarlyStopping callback you specified will determine early stopping). callbacks: List of Keras callbacks to apply during training and validation. validation_split: Float between 0 and 1. Defaults to 0.2. Fraction of the training data to be used as validation data. The model will set apart this fraction of the training data, will not train on it, and will evaluate the loss and any model metrics on this data at the end of each epoch. The validation data is selected from the last samples in the `x` and `y` data provided, before shuffling. This argument is not supported when `x` is a dataset. The best model found would be fit on the entire dataset including the validation data. validation_data: Data on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. `validation_data` will override `validation_split`. The type of the validation data should be the same as the training data. The best model found would be fit on the training dataset without the validation data. **kwargs: Any arguments supported by [keras.Model.fit](https://keras.io/api/models/model_training_apis/#fit-method). # Returns history: A Keras History object corresponding to the best model. Its History.history attribute is a record of training loss values and metrics values at successive epochs, as well as validation loss values and validation metrics values (if applicable). """ history = super().fit( x=x, y=y, epochs=epochs, callbacks=callbacks, validation_split=validation_split, validation_data=validation_data, **kwargs ) return history ================================================ FILE: autokeras/tasks/text_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from unittest import mock import numpy as np import autokeras as ak from autokeras import test_utils @mock.patch("autokeras.AutoModel.fit") def test_txt_clf_fit_call_auto_model_fit(fit, tmp_path): auto_model = ak.TextClassifier(directory=tmp_path, seed=test_utils.SEED) auto_model.fit(x=np.array(["a b c", "b b c"]), y=np.array([1, 2])) assert fit.is_called @mock.patch("autokeras.AutoModel.fit") def test_txt_reg_fit_call_auto_model_fit(fit, tmp_path): auto_model = ak.TextRegressor(directory=tmp_path, seed=test_utils.SEED) auto_model.fit(x=np.array(["a b c", "b b c"]), y=np.array([1.0, 2.0])) assert fit.is_called ================================================ FILE: autokeras/test_utils.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import inspect import os import keras import numpy as np import autokeras as ak SEED = 5 COLUMN_NAMES = [ "sex", "age", "n_siblings_spouses", "parch", "fare", "class", "deck", "embark_town", "alone", ] COLUMN_TYPES = { "sex": "categorical", "age": "numerical", "n_siblings_spouses": "categorical", "parch": "categorical", "fare": "numerical", "class": "categorical", "deck": "categorical", "embark_town": "categorical", "alone": "categorical", } ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) # Use pre-split CSVs stored under benchmark/datasets to avoid downloads and # processing at import time. TRAIN_CSV_PATH = os.path.join( ROOT_DIR, "benchmark", "datasets", "titanic_train.csv" ) TEST_CSV_PATH = os.path.join( ROOT_DIR, "benchmark", "datasets", "titanic_test.csv" ) def generate_data(num_instances=100, shape=(32, 32, 3)): np.random.seed(SEED) result = np.random.rand(*((num_instances,) + shape)) if result.dtype == np.float64: result = result.astype(np.float32) return result def generate_one_hot_labels(num_instances=100, num_classes=10): np.random.seed(SEED) labels = np.random.randint(num_classes, size=num_instances) result = keras.utils.to_categorical(labels, num_classes=num_classes) return result def generate_text_data(num_instances=100): vocab = np.array( [ ["adorable", "clueless", "dirty", "odd", "stupid"], ["puppy", "car", "rabbit", "girl", "monkey"], ["runs", "hits", "jumps", "drives", "barfs"], [ "crazily.", "dutifully.", "foolishly.", "merrily.", "occasionally.", ], ] ) return np.array( [ " ".join([vocab[j][np.random.randint(0, 5)] for j in range(4)]) for i in range(num_instances) ] ) def build_graph(): keras.backend.clear_session() image_input = ak.ImageInput(shape=(32, 32, 3)) image_input.batch_size = 32 image_input.num_samples = 1000 merged_outputs = ak.SpatialReduction()(image_input) head = ak.ClassificationHead(num_classes=10, shape=(10,)) classification_outputs = head(merged_outputs) return ak.graph.Graph(inputs=image_input, outputs=classification_outputs) def get_func_args(func): params = inspect.signature(func).parameters.keys() return set(params) - set(["self", "args", "kwargs"]) def get_object_detection_data(): images = generate_data(num_instances=2, shape=(32, 32, 3)) bbox_0 = np.random.rand(3, 4) class_id_0 = np.random.rand( 3, ) bbox_1 = np.random.rand(5, 4) class_id_1 = np.random.rand( 5, ) labels = np.array( [(bbox_0, class_id_0), (bbox_1, class_id_1)], dtype=object ) return images, labels def generate_data_with_categorical( num_instances=100, num_numerical=10, num_categorical=3, num_classes=5, ): categorical_data = np.random.randint( num_classes, size=(num_instances, num_categorical) ) numerical_data = np.random.rand(num_instances, num_numerical) data = np.concatenate((numerical_data, categorical_data), axis=1) if data.dtype == np.float64: data = data.astype(np.float32) return data ================================================ FILE: autokeras/tuners/__init__.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from autokeras.tuners.bayesian_optimization import BayesianOptimization from autokeras.tuners.greedy import Greedy from autokeras.tuners.hyperband import Hyperband from autokeras.tuners.random_search import RandomSearch from autokeras.tuners.task_specific import ImageClassifierTuner ================================================ FILE: autokeras/tuners/bayesian_optimization.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import keras_tuner from autokeras.engine import tuner as tuner_module class BayesianOptimization( keras_tuner.BayesianOptimization, tuner_module.AutoTuner ): """KerasTuner BayesianOptimization with preprocessing layer tuning.""" pass ================================================ FILE: autokeras/tuners/greedy.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import copy from typing import Any from typing import Dict from typing import List from typing import Optional import keras_tuner import numpy as np from autokeras.engine import tuner as tuner_module class TrieNode(object): def __init__(self): super().__init__() self.num_leaves = 0 self.children = {} self.hp_name = None def is_leaf(self): return len(self.children) == 0 class Trie(object): def __init__(self): super().__init__() self.root = TrieNode() def insert(self, hp_name): names = hp_name.split("/") new_word = False current_node = self.root nodes_on_path = [current_node] for name in names: if name not in current_node.children: current_node.children[name] = TrieNode() new_word = True current_node = current_node.children[name] nodes_on_path.append(current_node) current_node.hp_name = hp_name if new_word: for node in nodes_on_path: node.num_leaves += 1 @property def nodes(self): return self._get_all_nodes(self.root) def _get_all_nodes(self, node): ret = [node] for key, value in node.children.items(): ret += self._get_all_nodes(value) return ret def get_hp_names(self, node): if node.is_leaf(): return [node.hp_name] ret = [] for key, value in node.children.items(): ret += self.get_hp_names(value) return ret class GreedyOracle(keras_tuner.Oracle): """An oracle combining random search and greedy algorithm. It groups the HyperParameters into several categories, namely, HyperGraph, Preprocessor, Architecture, and Optimization. The oracle tunes each group separately using random search. In each trial, it use a greedy strategy to generate new values for one of the categories of HyperParameters and use the best trial so far for the rest of the HyperParameters values. # Arguments initial_hps: A list of dictionaries in the form of {HyperParameter name (String): HyperParameter value}. Each dictionary is one set of HyperParameters, which are used as the initial trials for the search. Defaults to None. seed: Int. Random seed. """ def __init__(self, initial_hps=None, seed=None, **kwargs): super().__init__(seed=seed, **kwargs) self.initial_hps = copy.deepcopy(initial_hps) or [] self._tried_initial_hps = [False] * len(self.initial_hps) def get_state(self): state = super().get_state() state.update( { "initial_hps": self.initial_hps, "tried_initial_hps": self._tried_initial_hps, } ) return state def set_state(self, state): super().set_state(state) self.initial_hps = state["initial_hps"] self._tried_initial_hps = state["tried_initial_hps"] def _select_hps(self): trie = Trie() best_hps = self._get_best_hps() for hp in best_hps.space: # Not picking the fixed hps for generating new values. if best_hps.is_active(hp) and not isinstance( hp, keras_tuner.engine.hyperparameters.Fixed ): trie.insert(hp.name) all_nodes = trie.nodes if len(all_nodes) <= 1: return [] probabilities = np.array([1 / node.num_leaves for node in all_nodes]) sum_p = np.sum(probabilities) probabilities = probabilities / sum_p node = np.random.choice(all_nodes, p=probabilities) return trie.get_hp_names(node) def _next_initial_hps(self): for index, hps in enumerate(self.initial_hps): if not self._tried_initial_hps[index]: self._tried_initial_hps[index] = True return hps def populate_space(self, trial_id): if not all(self._tried_initial_hps): values = self._next_initial_hps() return { "status": keras_tuner.engine.trial.TrialStatus.RUNNING, "values": values, } for _ in range(self._max_collisions): hp_names = self._select_hps() values = self._generate_hp_values(hp_names) # Reached max collisions. if values is None: continue # Values found. return { "status": keras_tuner.engine.trial.TrialStatus.RUNNING, "values": values, } # All stages reached max collisions. return { "status": keras_tuner.engine.trial.TrialStatus.STOPPED, "values": None, } def _get_best_hps(self): best_trials = self.get_best_trials() if best_trials: return best_trials[0].hyperparameters.copy() else: return self.hyperparameters.copy() def _generate_hp_values(self, hp_names): best_hps = self._get_best_hps() collisions = 0 while True: hps = keras_tuner.HyperParameters() # Generate a set of random values. for hp in self.hyperparameters.space: hps.merge([hp]) # if not active, do nothing. # if active, check if selected to be changed. if hps.is_active(hp): # if was active and not selected, do nothing. if best_hps.is_active(hp.name) and hp.name not in hp_names: hps.values[hp.name] = best_hps.values[hp.name] continue # if was not active or selected, sample. hps.values[hp.name] = hp.random_sample(self._seed_state) self._seed_state += 1 values = hps.values # Keep trying until the set of values is unique, # or until we exit due to too many collisions. values_hash = self._compute_values_hash(values) if values_hash in self._tried_so_far: collisions += 1 if collisions <= self._max_collisions: continue return None self._tried_so_far.add(values_hash) break return values class Greedy(tuner_module.AutoTuner): def __init__( self, hypermodel: keras_tuner.HyperModel, objective: str = "val_loss", max_trials: int = 10, initial_hps: Optional[List[Dict[str, Any]]] = None, seed: Optional[int] = None, hyperparameters: Optional[keras_tuner.HyperParameters] = None, tune_new_entries: bool = True, allow_new_entries: bool = True, **kwargs ): self.seed = seed oracle = GreedyOracle( objective=objective, max_trials=max_trials, initial_hps=initial_hps, seed=seed, hyperparameters=hyperparameters, tune_new_entries=tune_new_entries, allow_new_entries=allow_new_entries, ) super().__init__(oracle=oracle, hypermodel=hypermodel, **kwargs) ================================================ FILE: autokeras/tuners/greedy_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from unittest import mock import keras import keras_tuner import autokeras as ak from autokeras import test_utils from autokeras.tuners import greedy from autokeras.tuners import task_specific def test_greedy_oracle_get_state_update_space_can_run(): oracle = greedy.GreedyOracle(objective="val_loss") oracle.set_state(oracle.get_state()) hp = keras_tuner.HyperParameters() hp.Boolean("test") oracle.update_space(hp) @mock.patch("autokeras.tuners.greedy.GreedyOracle.get_best_trials") def test_greedy_oracle_populate_different_values(get_best_trials): hp = keras_tuner.HyperParameters() test_utils.build_graph().build(hp) oracle = greedy.GreedyOracle(objective="val_loss", seed=test_utils.SEED) trial = mock.Mock() trial.hyperparameters = hp get_best_trials.return_value = [trial] oracle.update_space(hp) values_a = oracle.populate_space("a")["values"] values_b = oracle.populate_space("b")["values"] assert not all([values_a[key] == values_b[key] for key in values_a]) @mock.patch("autokeras.tuners.greedy.GreedyOracle.get_best_trials") def test_greedy_oracle_populate_doesnt_crash_with_init_hps(get_best_trials): hp = keras_tuner.HyperParameters() keras.backend.clear_session() input_node = ak.ImageInput(shape=(32, 32, 3)) input_node.batch_size = 32 input_node.num_samples = 1000 output_node = ak.ImageBlock()(input_node) head = ak.ClassificationHead(num_classes=10) head.shape = (10,) output_node = head(output_node) graph = ak.graph.Graph(inputs=input_node, outputs=output_node) graph.build(hp) oracle = greedy.GreedyOracle( initial_hps=task_specific.IMAGE_CLASSIFIER, objective="val_loss", seed=test_utils.SEED, ) trial = mock.Mock() trial.hyperparameters = hp get_best_trials.return_value = [trial] for i in range(10): keras.backend.clear_session() values = oracle.populate_space("a")["values"] hp = oracle.hyperparameters.copy() hp.values = values graph.build(hp) oracle.update_space(hp) @mock.patch("autokeras.tuners.greedy.GreedyOracle._compute_values_hash") @mock.patch("autokeras.tuners.greedy.GreedyOracle.get_best_trials") def test_greedy_oracle_stop_reach_max_collision( get_best_trials, compute_values_hash ): hp = keras_tuner.HyperParameters() test_utils.build_graph().build(hp) oracle = greedy.GreedyOracle(objective="val_loss", seed=test_utils.SEED) trial = mock.Mock() trial.hyperparameters = hp get_best_trials.return_value = [trial] compute_values_hash.return_value = 1 oracle.update_space(hp) oracle.populate_space("a")["values"] assert ( oracle.populate_space("b")["status"] == keras_tuner.engine.trial.TrialStatus.STOPPED ) @mock.patch("autokeras.tuners.greedy.GreedyOracle.get_best_trials") def test_greedy_oracle_populate_space_with_no_hp(get_best_trials): hp = keras_tuner.HyperParameters() oracle = greedy.GreedyOracle(objective="val_loss", seed=test_utils.SEED) trial = mock.Mock() trial.hyperparameters = hp get_best_trials.return_value = [trial] oracle.update_space(hp) values_a = oracle.populate_space("a")["values"] assert len(values_a) == 0 ================================================ FILE: autokeras/tuners/hyperband.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import keras_tuner from autokeras.engine import tuner as tuner_module class Hyperband(keras_tuner.Hyperband, tuner_module.AutoTuner): """KerasTuner Hyperband with preprocessing layer tuning.""" def __init__( self, max_epochs: int = 1000, max_trials: int = 100, *args, **kwargs ): super().__init__(max_epochs=max_epochs, *args, **kwargs) self.oracle.max_trials = max_trials ================================================ FILE: autokeras/tuners/hyperband_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ================================================ FILE: autokeras/tuners/random_search.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import keras_tuner from autokeras.engine import tuner as tuner_module class RandomSearch(keras_tuner.RandomSearch, tuner_module.AutoTuner): """KerasTuner RandomSearch with preprocessing layer tuning.""" pass ================================================ FILE: autokeras/tuners/random_search_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ================================================ FILE: autokeras/tuners/task_specific.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from autokeras.tuners import greedy IMAGE_CLASSIFIER = [ { "image_block_1/block_type": "vanilla", "image_block_1/normalize": True, "image_block_1/augment": False, "image_block_1/conv_block_1/kernel_size": 3, "image_block_1/conv_block_1/num_blocks": 1, "image_block_1/conv_block_1/num_layers": 2, "image_block_1/conv_block_1/max_pooling": True, "image_block_1/conv_block_1/separable": False, "image_block_1/conv_block_1/dropout": 0.25, "image_block_1/conv_block_1/filters_0_0": 32, "image_block_1/conv_block_1/filters_0_1": 64, "classification_head_1/spatial_reduction_1/reduction_type": "flatten", "classification_head_1/dropout": 0.5, "optimizer": "adam", "learning_rate": 1e-3, }, { "image_block_1/block_type": "resnet", "image_block_1/normalize": True, "image_block_1/augment": True, "image_block_1/image_augmentation_1/horizontal_flip": True, "image_block_1/image_augmentation_1/vertical_flip": True, "image_block_1/image_augmentation_1/contrast_factor": 0.0, "image_block_1/image_augmentation_1/rotation_factor": 0.0, "image_block_1/image_augmentation_1/translation_factor": 0.1, "image_block_1/image_augmentation_1/zoom_factor": 0.0, "image_block_1/res_net_block_1/pretrained": False, "image_block_1/res_net_block_1/version": "resnet50", "image_block_1/res_net_block_1/imagenet_size": True, "classification_head_1/spatial_reduction_1/reduction_type": "global_avg", # noqa: E501 "classification_head_1/dropout": 0, "optimizer": "adam", "learning_rate": 1e-3, }, { "image_block_1/block_type": "efficient", "image_block_1/normalize": True, "image_block_1/augment": True, "image_block_1/image_augmentation_1/horizontal_flip": True, "image_block_1/image_augmentation_1/vertical_flip": False, "image_block_1/image_augmentation_1/contrast_factor": 0.0, "image_block_1/image_augmentation_1/rotation_factor": 0.0, "image_block_1/image_augmentation_1/translation_factor": 0.1, "image_block_1/image_augmentation_1/zoom_factor": 0.0, "image_block_1/efficient_net_block_1/pretrained": True, "image_block_1/efficient_net_block_1/version": "b7", "image_block_1/efficient_net_block_1/trainable": True, "image_block_1/efficient_net_block_1/imagenet_size": True, "classification_head_1/spatial_reduction_1/reduction_type": "global_avg", # noqa: E501 "classification_head_1/dropout": 0, "optimizer": "adam", "learning_rate": 2e-5, }, ] TEXT_CLASSIFIER = [ { "text_block_1/max_tokens": 500, "text_block_1/embedding_1/embedding_dim": 32, "text_block_1/embedding_1/dropout": 0.25, "text_block_1/conv_block_1/kernel_size": 3, "text_block_1/conv_block_1/separable": False, "text_block_1/conv_block_1/max_pooling": True, "text_block_1/conv_block_1/num_blocks": 2, "text_block_1/conv_block_1/num_layers": 2, "text_block_1/conv_block_1/filters_0_0": 32, "text_block_1/conv_block_1/filters_0_1": 32, "text_block_1/conv_block_1/dropout": 0.0, "text_block_1/conv_block_1/filters_1_0": 32, "text_block_1/conv_block_1/filters_1_1": 32, "text_block_1/spatial_reduction_1/reduction_type": "flatten", "text_block_1/dense_block_1/use_batchnorm": False, "text_block_1/dense_block_1/num_layers": 2, "text_block_1/dense_block_1/units_0": 32, "text_block_1/dense_block_1/dropout": 0.0, "text_block_1/dense_block_1/units_1": 32, "classification_head_1/dropout": 0, "optimizer": "adam_weight_decay", "learning_rate": 2e-5, }, ] STRUCTURED_DATA_CLASSIFIER = [ { "structured_data_block_1/normalize": True, "structured_data_block_1/dense_block_1/num_layers": 2, "structured_data_block_1/dense_block_1/use_batchnorm": False, "structured_data_block_1/dense_block_1/dropout": 0, "structured_data_block_1/dense_block_1/units_0": 32, "structured_data_block_1/dense_block_1/units_1": 32, "classification_head_1/dropout": 0.0, "optimizer": "adam", "learning_rate": 0.001, } ] STRUCTURED_DATA_REGRESSOR = [ { "structured_data_block_1/normalize": True, "structured_data_block_1/dense_block_1/num_layers": 2, "structured_data_block_1/dense_block_1/use_batchnorm": False, "structured_data_block_1/dense_block_1/dropout": 0, "structured_data_block_1/dense_block_1/units_0": 32, "structured_data_block_1/dense_block_1/units_1": 32, "regression_head_1/dropout": 0.0, "optimizer": "adam", "learning_rate": 0.001, } ] class ImageClassifierTuner(greedy.Greedy): def __init__(self, **kwargs): super().__init__(initial_hps=IMAGE_CLASSIFIER, **kwargs) class TextClassifierTuner(greedy.Greedy): def __init__(self, **kwargs): super().__init__(initial_hps=TEXT_CLASSIFIER, **kwargs) class StructuredDataClassifierTuner(greedy.Greedy): def __init__(self, **kwargs): super().__init__(initial_hps=STRUCTURED_DATA_CLASSIFIER, **kwargs) class StructuredDataRegressorTuner(greedy.Greedy): def __init__(self, **kwargs): super().__init__(initial_hps=STRUCTURED_DATA_REGRESSOR, **kwargs) ================================================ FILE: autokeras/tuners/task_specific_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import copy import keras_tuner import autokeras as ak from autokeras.tuners import task_specific def test_img_clf_init_hp0_equals_hp_of_a_model(tmp_path): clf = ak.ImageClassifier(directory=tmp_path) clf.inputs[0].shape = (32, 32, 3) clf.outputs[0].in_blocks[0].shape = (10,) init_hp = task_specific.IMAGE_CLASSIFIER[0] hp = keras_tuner.HyperParameters() hp.values = copy.copy(init_hp) clf.tuner.hypermodel.build(hp) assert set(init_hp.keys()) == set(hp._hps.keys()) def test_img_clf_init_hp1_equals_hp_of_a_model(tmp_path): clf = ak.ImageClassifier(directory=tmp_path) clf.inputs[0].shape = (32, 32, 3) clf.outputs[0].in_blocks[0].shape = (10,) init_hp = task_specific.IMAGE_CLASSIFIER[1] hp = keras_tuner.HyperParameters() hp.values = copy.copy(init_hp) clf.tuner.hypermodel.build(hp) assert set(init_hp.keys()) == set(hp._hps.keys()) def test_img_clf_init_hp2_equals_hp_of_a_model(tmp_path): clf = ak.ImageClassifier(directory=tmp_path) clf.inputs[0].shape = (32, 32, 3) clf.outputs[0].in_blocks[0].shape = (10,) init_hp = task_specific.IMAGE_CLASSIFIER[2] hp = keras_tuner.HyperParameters() hp.values = copy.copy(init_hp) clf.tuner.hypermodel.build(hp) assert set(init_hp.keys()) == set(hp._hps.keys()) def test_txt_clf_init_hp0_equals_hp_of_a_model(tmp_path): clf = ak.TextClassifier(directory=tmp_path) clf.inputs[0].shape = (1,) clf.inputs[0].batch_size = 6 clf.inputs[0].num_samples = 1000 clf.outputs[0].in_blocks[0].shape = (10,) clf.tuner.hypermodel.epochs = 1000 clf.tuner.hypermodel.num_samples = 20000 init_hp = task_specific.TEXT_CLASSIFIER[0] hp = keras_tuner.HyperParameters() hp.values = copy.copy(init_hp) clf.tuner.hypermodel.build(hp) assert set(init_hp.keys()) == set(hp._hps.keys()) def test_sd_clf_init_hp0_equals_hp_of_a_model(tmp_path): clf = ak.StructuredDataClassifier( directory=tmp_path, column_names=["a", "b"], column_types={"a": "numerical", "b": "numerical"}, ) clf.inputs[0].shape = (2,) clf.outputs[0].in_blocks[0].shape = (10,) init_hp = task_specific.STRUCTURED_DATA_CLASSIFIER[0] hp = keras_tuner.HyperParameters() hp.values = copy.copy(init_hp) clf.tuner.hypermodel.build(hp) assert set(init_hp.keys()) == set(hp._hps.keys()) def test_sd_reg_init_hp0_equals_hp_of_a_model(tmp_path): clf = ak.StructuredDataRegressor( directory=tmp_path, column_names=["a", "b"], column_types={"a": "numerical", "b": "numerical"}, ) clf.inputs[0].shape = (2,) clf.outputs[0].in_blocks[0].shape = (10,) init_hp = task_specific.STRUCTURED_DATA_REGRESSOR[0] hp = keras_tuner.HyperParameters() hp.values = copy.copy(init_hp) clf.tuner.hypermodel.build(hp) assert set(init_hp.keys()) == set(hp._hps.keys()) ================================================ FILE: autokeras/utils/__init__.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ================================================ FILE: autokeras/utils/data_utils.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import keras import numpy as np import tree from keras import ops def split_dataset(dataset, validation_split): """Split nested numpy arrays into training and validation. # Arguments dataset: A nested structure of numpy arrays. validation_split: Float. The split ratio for the validation set. # Raises ValueError: If the dataset provided is too small to be split. # Returns A tuple of two nested structures. The training set and the validation set. """ # Get the number of instances from the first array's shape first_shape = tree.flatten( tree.map_structure(lambda x: np.array(x.shape), dataset) )[0] num_instances = first_shape[0] if num_instances < 2: raise ValueError( "The dataset should at least contain 2 instances to be split." ) validation_set_size = min( max(int(num_instances * validation_split), 1), num_instances - 1 ) train_set_size = num_instances - validation_set_size # Split each array in the nested structure train_dataset = tree.map_structure(lambda x: x[:train_set_size], dataset) validation_dataset = tree.map_structure( lambda x: x[train_set_size:], dataset ) return train_dataset, validation_dataset def cast_to_float32(tensor): if keras.backend.standardize_dtype(tensor.dtype) == "float32": return tensor return ops.cast(tensor, "float32") # pragma: no cover def dataset_shape(dataset): """Recursively get shapes from a nested structure of numpy arrays. # Arguments nested_arrays: A nested structure (dict, list, tuple) containing numpy arrays. # Returns The same nested structure with shapes instead of arrays. """ return tree.map_structure(lambda x: np.array(x.shape), dataset) ================================================ FILE: autokeras/utils/data_utils_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import pytest from autokeras.utils import data_utils def test_split_data_has_one_batch_error(): with pytest.raises(ValueError) as info: data_utils.split_dataset(np.array([1]), 0.2) assert "The dataset should at least contain 2" in str(info.value) ================================================ FILE: autokeras/utils/io_utils.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import json from keras_tuner.engine import hyperparameters WHITELIST_FORMATS = (".bmp", ".gif", ".jpeg", ".jpg", ".png") def save_json(path, obj): with open(path, "w") as f: json.dump(obj, f) def load_json(path): with open(path, "r") as f: return json.load(f) def deserialize_block_arg(arg): if isinstance(arg, dict): return hyperparameters.deserialize(arg) return arg def serialize_block_arg(arg): if isinstance(arg, hyperparameters.HyperParameter): return hyperparameters.serialize(arg) return arg ================================================ FILE: autokeras/utils/layer_utils.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from keras import layers def get_global_average_pooling(shape): return [ layers.GlobalAveragePooling1D, layers.GlobalAveragePooling2D, layers.GlobalAveragePooling3D, ][len(shape) - 3] def get_global_max_pooling(shape): return [ layers.GlobalMaxPool1D, layers.GlobalMaxPool2D, layers.GlobalMaxPool3D, ][len(shape) - 3] def get_max_pooling(shape): return [ layers.MaxPool1D, layers.MaxPool2D, layers.MaxPool3D, ][len(shape) - 3] def get_conv(shape): return [layers.Conv1D, layers.Conv2D, layers.Conv3D][len(shape) - 3] def get_sep_conv(shape): return [ # pragma: no cover layers.SeparableConv1D, layers.SeparableConv2D, layers.Conv3D, ][len(shape) - 3] ================================================ FILE: autokeras/utils/types.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from typing import Callable from typing import Dict from typing import List from typing import Union import numpy as np from keras.losses import Loss from keras.metrics import Metric DatasetType = Union[np.ndarray] LossType = Union[str, Callable, Loss] AcceptableMetric = Union[str, Callable, Metric] MetricsType = Union[ List[AcceptableMetric], List[List[AcceptableMetric]], Dict[str, AcceptableMetric], ] ================================================ FILE: autokeras/utils/utils.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import re import keras import keras_tuner import tree # Collect OOM exceptions from available libraries oom_exceptions = [] try: import tensorflow as tf oom_exceptions.append(tf.errors.ResourceExhaustedError) # pragma: no cover except ImportError: # pragma: no cover pass # pragma: no cover try: import torch oom_exceptions.append(torch.cuda.OutOfMemoryError) # pragma: no cover except ImportError: # pragma: no cover pass # pragma: no cover try: import jax oom_exceptions.append(jax.errors.ResourceExhaustedError) # pragma: no cover except (ImportError, AttributeError): # pragma: no cover pass # pragma: no cover oom_exceptions = tuple(oom_exceptions) def validate_num_inputs(inputs, num): inputs = tree.flatten(inputs) if not len(inputs) == num: raise ValueError( "Expected {num} elements in the inputs list " "but received {len} inputs.".format(num=num, len=len(inputs)) ) def to_snake_case(name): intermediate = re.sub("(.)([A-Z][a-z0-9]+)", r"\1_\2", name) insecure = re.sub("([a-z])([A-Z])", r"\1_\2", intermediate).lower() return insecure def contain_instance(instance_list, instance_type): return any( [isinstance(instance, instance_type) for instance in instance_list] ) def evaluate_with_adaptive_batch_size( model, batch_size, verbose=1, **fit_kwargs ): return run_with_adaptive_batch_size( batch_size, lambda x, validation_data, **kwargs: model.evaluate( x, verbose=verbose, **kwargs ), **fit_kwargs, ) def predict_with_adaptive_batch_size( model, batch_size, verbose=1, **fit_kwargs ): return run_with_adaptive_batch_size( batch_size, lambda x, validation_data, **kwargs: model.predict( x, verbose=verbose, **kwargs ), **fit_kwargs, ) def fit_with_adaptive_batch_size(model, batch_size, **fit_kwargs): history = run_with_adaptive_batch_size( batch_size, lambda **kwargs: model.fit(**kwargs), **fit_kwargs ) return model, history def run_with_adaptive_batch_size(batch_size, func, **fit_kwargs): validation_data = None if "validation_data" in fit_kwargs: validation_data = fit_kwargs.pop("validation_data") while batch_size > 0: try: history = func( validation_data=validation_data, batch_size=batch_size, **fit_kwargs, ) break except oom_exceptions as e: # pragma: no cover if batch_size == 1: # pragma: no cover raise e # pragma: no cover batch_size //= 2 # pragma: no cover print( # pragma: no cover "Not enough memory, reduce batch size to {batch_size}.".format( batch_size=batch_size ) ) return history def get_hyperparameter(value, hp, dtype): if value is None: return hp return value def add_to_hp(hp, hps, name=None): """Add the HyperParameter (self) to the HyperParameters. # Arguments hp: keras_tuner.HyperParameters. name: String. If left unspecified, the hp name is used. """ if not isinstance(hp, keras_tuner.engine.hyperparameters.HyperParameter): return hp kwargs = hp.get_config() if name is None: name = hp.name kwargs.pop("conditions") kwargs.pop("name") class_name = hp.__class__.__name__ func = getattr(hps, class_name) return func(name=name, **kwargs) def serialize_keras_object(obj): return keras.utils.serialize_keras_object(obj) # pragma: no cover def deserialize_keras_object(config, module_objects=None, custom_objects=None): return keras.utils.deserialize_keras_object( config, custom_objects, module_objects ) ================================================ FILE: autokeras/utils/utils_test.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import pytest from keras_tuner.engine import hyperparameters from autokeras.utils import utils def test_validate_num_inputs_error(): with pytest.raises(ValueError) as info: utils.validate_num_inputs([1, 2, 3], 2) assert "Expected 2 elements in the inputs list" in str(info.value) def test_get_hyperparameter_with_none_return_hp(): hp = utils.get_hyperparameter( None, hyperparameters.Choice("hp", [10, 20]), int ) assert isinstance(hp, hyperparameters.Choice) def test_get_hyperparameter_with_int_return_int(): value = utils.get_hyperparameter( 10, hyperparameters.Choice("hp", [10, 20]), int ) assert isinstance(value, int) assert value == 10 def test_get_hyperparameter_with_hp_return_same(): hp = utils.get_hyperparameter( hyperparameters.Choice("hp", [10, 30]), hyperparameters.Choice("hp", [10, 20]), int, ) assert isinstance(hp, hyperparameters.Choice) ================================================ FILE: benchmark/README.md ================================================ Usage: python benchmark/run.py structured_data_classification report.csv ================================================ FILE: benchmark/__init__.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ================================================ FILE: benchmark/datasets/titanic_test.csv ================================================ sex,age,n_siblings_spouses,parch,fare,class,deck,embark_town,alone,survived female,15,0,0,7.225,3,?,C,True,1 female,1,0,2,15.7417,3,?,C,False,1 male,20,1,1,15.7417,3,?,C,False,1 female,19,1,1,15.7417,3,?,C,False,1 male,33,0,0,8.05,3,?,S,True,0 male,,0,0,7.8958,3,?,S,True,0 male,,0,0,7.2292,3,?,C,True,0 female,,0,0,7.75,3,?,Q,True,0 male,,0,0,7.8958,3,?,S,True,0 male,12,1,0,11.2417,3,?,C,False,1 female,14,1,0,11.2417,3,?,C,False,1 female,29,0,0,7.925,3,?,S,True,0 male,28,0,0,8.05,3,?,S,True,0 female,18,0,0,7.775,3,?,S,True,1 female,26,0,0,7.8542,3,?,S,True,1 male,21,0,0,7.8542,3,?,S,True,0 male,41,0,0,7.125,3,?,S,True,0 male,39,0,0,7.925,3,?,S,True,1 male,21,0,0,7.8,3,?,S,True,0 male,28.5,0,0,7.2292,3,?,C,True,0 female,22,0,0,7.75,3,?,S,True,1 male,61,0,0,6.2375,3,?,S,True,0 male,,1,0,15.5,3,?,Q,False,0 male,,0,0,7.8292,3,?,Q,True,0 female,,1,0,15.5,3,?,Q,False,1 male,,0,0,7.7333,3,?,Q,True,0 male,,0,0,7.75,3,?,Q,True,0 male,,0,0,7.75,3,?,Q,True,0 male,23,0,0,9.225,3,?,S,True,0 female,,0,0,7.75,3,?,Q,True,0 female,,0,0,7.75,3,?,Q,True,1 female,,0,0,7.8792,3,?,Q,True,1 female,22,0,0,7.775,3,?,S,True,1 male,,0,0,7.75,3,?,Q,True,1 female,,0,0,7.8292,3,?,Q,True,1 male,9,0,1,3.1708,3,?,S,False,1 male,28,0,0,22.525,3,?,S,True,0 male,42,0,1,8.4042,3,?,S,False,0 male,,0,0,7.3125,3,?,S,True,0 female,31,0,0,7.8542,3,?,S,True,0 male,28,0,0,7.8542,3,?,S,True,0 male,32,0,0,7.775,3,?,S,True,1 male,20,0,0,9.225,3,?,S,True,0 female,23,0,0,8.6625,3,?,S,True,0 female,20,0,0,8.6625,3,?,S,True,0 male,20,0,0,8.6625,3,?,S,True,0 male,16,0,0,9.2167,3,?,S,True,0 female,31,0,0,8.6833,3,?,S,True,1 female,,0,0,7.6292,3,?,Q,True,0 male,2,3,1,21.075,3,?,S,False,0 male,6,3,1,21.075,3,?,S,False,0 female,3,3,1,21.075,3,?,S,False,0 female,8,3,1,21.075,3,?,S,False,0 female,29,0,4,21.075,3,?,S,False,0 male,1,4,1,39.6875,3,?,S,False,0 male,7,4,1,39.6875,3,?,S,False,0 male,2,4,1,39.6875,3,?,S,False,0 male,16,4,1,39.6875,3,?,S,False,0 male,14,4,1,39.6875,3,?,S,False,0 female,41,0,5,39.6875,3,?,S,False,0 male,21,0,0,8.6625,3,?,S,True,0 male,19,0,0,14.5,3,?,S,True,0 male,,0,0,8.7125,3,?,C,True,0 male,32,0,0,7.8958,3,?,S,True,0 male,0.75,1,1,13.775,3,?,S,False,0 female,3,1,1,13.775,3,?,S,False,0 female,26,0,2,13.775,3,?,S,False,0 male,,0,0,7,3,?,S,True,0 male,,0,0,7.775,3,?,S,True,0 male,,0,0,8.05,3,?,S,True,0 male,21,0,0,7.925,3,?,S,True,0 male,25,0,0,7.925,3,?,S,True,0 male,22,0,0,7.25,3,?,S,True,0 male,25,1,0,7.775,3,?,S,False,1 male,,1,1,22.3583,3,?,C,False,1 female,,1,1,22.3583,3,F E69,C,False,1 female,,0,2,22.3583,3,?,C,False,1 female,,0,0,8.1375,3,?,Q,True,0 male,24,0,0,8.05,3,?,S,True,0 female,28,0,0,7.8958,3,?,S,True,0 male,19,0,0,7.8958,3,?,S,True,0 male,,0,0,7.8958,3,?,S,True,0 male,25,1,0,7.775,3,?,S,False,0 female,18,0,0,7.775,3,?,S,True,0 male,32,0,0,8.05,3,E10,S,True,1 male,,0,0,7.8958,3,?,S,True,0 male,17,0,0,8.6625,3,?,S,True,0 male,24,0,0,8.6625,3,?,S,True,0 male,,0,0,7.8958,3,?,S,True,0 female,,0,0,8.1125,3,?,S,True,0 male,,0,0,7.2292,3,?,C,True,0 male,,0,0,7.25,3,?,S,True,0 male,38,0,0,7.8958,3,?,S,True,0 male,21,0,0,8.05,3,?,S,True,0 male,10,4,1,29.125,3,?,Q,False,0 male,4,4,1,29.125,3,?,Q,False,0 male,7,4,1,29.125,3,?,Q,False,0 male,2,4,1,29.125,3,?,Q,False,0 male,8,4,1,29.125,3,?,Q,False,0 female,39,0,5,29.125,3,?,Q,False,0 female,22,0,0,39.6875,3,?,S,True,0 male,35,0,0,7.125,3,?,S,True,0 female,,0,0,7.7208,3,?,Q,True,1 male,,0,0,14.5,3,?,S,True,0 female,,0,0,14.5,3,?,S,True,0 male,50,1,0,14.5,3,?,S,False,0 female,47,1,0,14.5,3,?,S,False,0 male,,0,0,8.05,3,?,S,True,0 male,,0,0,7.775,3,?,S,True,0 female,2,1,1,20.2125,3,?,S,False,0 male,18,1,1,20.2125,3,?,S,False,0 female,41,0,2,20.2125,3,?,S,False,0 female,,0,0,8.05,3,?,S,True,1 male,50,0,0,8.05,3,?,S,True,0 male,16,0,0,8.05,3,?,S,True,0 male,,0,0,7.75,3,?,Q,True,1 male,,0,0,24.15,3,?,Q,True,0 male,,0,0,7.2292,3,?,C,True,0 male,25,0,0,7.225,3,?,C,True,0 male,,0,0,7.225,3,?,C,True,0 male,,0,0,7.7292,3,?,Q,True,0 male,,0,0,7.575,3,?,S,True,0 male,38.5,0,0,7.25,3,?,S,True,0 male,,8,2,69.55,3,?,S,False,0 male,14.5,8,2,69.55,3,?,S,False,0 female,,8,2,69.55,3,?,S,False,0 female,,8,2,69.55,3,?,S,False,0 female,,8,2,69.55,3,?,S,False,0 female,,8,2,69.55,3,?,S,False,0 male,,8,2,69.55,3,?,S,False,0 male,,8,2,69.55,3,?,S,False,0 male,,8,2,69.55,3,?,S,False,0 male,,1,9,69.55,3,?,S,False,0 female,,1,9,69.55,3,?,S,False,0 male,24,0,0,9.325,3,?,S,True,0 female,21,0,0,7.65,3,?,S,True,1 male,39,0,0,7.925,3,?,S,True,0 male,,2,0,21.6792,3,?,C,False,0 male,,2,0,21.6792,3,?,C,False,0 male,,2,0,21.6792,3,?,C,False,0 female,1,1,1,16.7,3,G6,S,False,1 female,24,0,2,16.7,3,G6,S,False,1 female,4,1,1,16.7,3,G6,S,False,1 male,25,0,0,9.5,3,?,S,True,1 male,20,0,0,8.05,3,?,S,True,0 male,24.5,0,0,8.05,3,?,S,True,0 male,,0,0,7.725,3,?,Q,True,0 male,,0,0,7.8958,3,?,S,True,0 male,,0,0,7.75,3,?,Q,True,0 male,29,0,0,9.5,3,?,S,True,1 male,,0,0,15.1,3,?,S,True,0 female,,0,0,7.7792,3,?,Q,True,1 male,,0,0,8.05,3,?,S,True,0 male,,0,0,8.05,3,?,S,True,0 male,22,0,0,7.2292,3,?,C,True,0 male,,0,0,8.05,3,?,S,True,0 male,40,0,0,7.8958,3,?,S,True,0 male,21,0,0,7.925,3,?,S,True,0 female,18,0,0,7.4958,3,?,S,True,1 male,4,3,2,27.9,3,?,S,False,0 male,10,3,2,27.9,3,?,S,False,0 female,9,3,2,27.9,3,?,S,False,0 female,2,3,2,27.9,3,?,S,False,0 male,40,1,4,27.9,3,?,S,False,0 female,45,1,4,27.9,3,?,S,False,0 male,,0,0,7.8958,3,?,S,True,0 male,,0,0,8.05,3,?,S,True,0 male,,0,0,8.6625,3,?,S,True,0 male,,0,0,7.75,3,?,Q,True,0 female,,0,0,7.7333,3,?,Q,True,1 male,19,0,0,7.65,3,F G73,S,True,0 male,30,0,0,8.05,3,?,S,True,0 male,,0,0,8.05,3,?,S,True,0 male,32,0,0,8.05,3,?,S,True,0 male,,0,0,7.8958,3,?,S,True,0 male,33,0,0,8.6625,3,?,C,True,0 female,23,0,0,7.55,3,?,S,True,1 male,21,0,0,8.05,3,?,S,True,0 male,60.5,0,0,?,3,?,S,True,0 male,19,0,0,7.8958,3,?,S,True,0 female,22,0,0,9.8375,3,?,S,True,0 male,31,0,0,7.925,3,?,S,True,1 male,27,0,0,8.6625,3,?,S,True,0 female,2,0,1,10.4625,3,G6,S,False,0 female,29,1,1,10.4625,3,G6,S,False,0 male,16,0,0,8.05,3,?,S,True,1 male,44,0,0,7.925,3,?,S,True,1 male,25,0,0,7.05,3,?,S,True,0 male,74,0,0,7.775,3,?,S,True,0 male,14,0,0,9.225,3,?,S,True,1 male,24,0,0,7.7958,3,?,S,True,0 male,25,0,0,7.7958,3,?,S,True,1 male,34,0,0,8.05,3,?,S,True,0 male,0.4167,0,1,8.5167,3,?,C,False,1 male,,1,0,6.4375,3,?,C,False,0 male,,0,0,6.4375,3,?,C,True,0 male,,0,0,7.225,3,?,C,True,0 female,16,1,1,8.5167,3,?,C,False,1 male,,0,0,8.05,3,?,S,True,0 male,,1,0,16.1,3,?,S,False,0 female,,1,0,16.1,3,?,S,False,1 male,32,0,0,7.925,3,?,S,True,0 male,,0,0,7.75,3,F38,Q,True,0 male,,0,0,7.8958,3,?,S,True,0 male,30.5,0,0,8.05,3,?,S,True,0 male,44,0,0,8.05,3,?,S,True,0 male,,0,0,7.2292,3,?,C,True,0 male,25,0,0,0,3,?,S,True,1 male,,0,0,7.2292,3,?,C,True,0 male,7,1,1,15.2458,3,?,C,False,1 female,9,1,1,15.2458,3,?,C,False,1 female,29,0,2,15.2458,3,?,C,False,1 male,36,0,0,7.8958,3,?,S,True,0 female,18,0,0,9.8417,3,?,S,True,1 female,63,0,0,9.5875,3,?,S,True,1 male,,1,1,14.5,3,?,S,False,0 male,11.5,1,1,14.5,3,?,S,False,0 male,40.5,0,2,14.5,3,?,S,False,0 female,10,0,2,24.15,3,?,S,False,0 male,36,1,1,24.15,3,?,S,False,0 female,30,1,1,24.15,3,?,S,False,0 male,,0,0,9.5,3,?,S,True,0 male,33,0,0,9.5,3,?,S,True,0 male,28,0,0,9.5,3,?,S,True,0 male,28,0,0,9.5,3,?,S,True,0 male,47,0,0,9,3,?,S,True,0 female,18,2,0,18,3,?,S,False,0 male,31,3,0,18,3,?,S,False,0 male,16,2,0,18,3,?,S,False,0 female,31,1,0,18,3,?,S,False,0 male,22,0,0,7.225,3,?,C,True,1 male,20,0,0,7.8542,3,?,S,True,0 female,14,0,0,7.8542,3,?,S,True,0 male,22,0,0,7.8958,3,?,S,True,0 male,22,0,0,9,3,?,S,True,0 male,,0,0,8.05,3,?,S,True,0 male,,0,0,7.55,3,?,S,True,0 male,,0,0,8.05,3,?,S,True,0 male,32.5,0,0,9.5,3,?,S,True,0 female,38,0,0,7.2292,3,?,C,True,1 male,51,0,0,7.75,3,?,S,True,0 male,18,1,0,6.4958,3,?,S,False,0 male,21,1,0,6.4958,3,?,S,False,0 female,47,1,0,7,3,?,S,False,1 male,,0,0,8.7125,3,?,S,True,0 male,,0,0,7.55,3,?,S,True,0 male,,0,0,8.05,3,?,S,True,0 male,28.5,0,0,16.1,3,?,S,True,0 male,21,0,0,7.25,3,?,S,True,0 male,27,0,0,8.6625,3,?,S,True,0 male,,0,0,7.25,3,?,S,True,0 male,36,0,0,9.5,3,?,S,True,0 male,27,1,0,14.4542,3,?,C,False,0 female,15,1,0,14.4542,3,?,C,False,1 male,45.5,0,0,7.225,3,?,C,True,0 male,,0,0,7.225,3,?,C,True,0 male,,0,0,14.4583,3,?,C,True,0 female,14.5,1,0,14.4542,3,?,C,False,0 female,,1,0,14.4542,3,?,C,False,0 male,26.5,0,0,7.225,3,?,C,True,0 male,27,0,0,7.225,3,?,C,True,0 male,29,0,0,7.875,3,?,S,True,0 ================================================ FILE: benchmark/datasets/titanic_train.csv ================================================ sex,age,n_siblings_spouses,parch,fare,class,deck,embark_town,alone,survived female,29,0,0,211.3375,1,B5,S,True,1 male,0.9167,1,2,151.55,1,C22 C26,S,False,1 female,2,1,2,151.55,1,C22 C26,S,False,0 male,30,1,2,151.55,1,C22 C26,S,False,0 female,25,1,2,151.55,1,C22 C26,S,False,0 male,48,0,0,26.55,1,E12,S,True,1 female,63,1,0,77.9583,1,D7,S,False,1 male,39,0,0,0.0,1,A36,S,True,0 female,53,2,0,51.4792,1,C101,S,False,1 male,71,0,0,49.5042,1,?,C,True,0 male,47,1,0,227.525,1,C62 C64,C,False,0 female,18,1,0,227.525,1,C62 C64,C,False,1 female,24,0,0,69.3,1,B35,C,True,1 female,26,0,0,78.85,1,?,S,True,1 male,80,0,0,30.0,1,A23,S,True,1 male,,0,0,25.925,1,?,S,True,0 male,24,0,1,247.5208,1,B58 B60,C,False,0 female,50,0,1,247.5208,1,B58 B60,C,False,1 female,32,0,0,76.2917,1,D15,C,True,1 male,36,0,0,75.2417,1,C6,C,True,0 male,37,1,1,52.5542,1,D35,S,False,1 female,47,1,1,52.5542,1,D35,S,False,1 male,26,0,0,30.0,1,C148,C,True,1 female,42,0,0,227.525,1,?,C,True,1 female,29,0,0,221.7792,1,C97,S,True,1 male,25,0,0,26.0,1,?,C,True,0 male,25,1,0,91.0792,1,B49,C,False,1 female,19,1,0,91.0792,1,B49,C,False,1 female,35,0,0,135.6333,1,C99,S,True,1 male,28,0,0,26.55,1,C52,S,True,1 male,45,0,0,35.5,1,T,S,True,0 male,40,0,0,31.0,1,A31,C,True,1 female,30,0,0,164.8667,1,C7,S,True,1 female,58,0,0,26.55,1,C103,S,True,1 male,42,0,0,26.55,1,D22,S,True,0 female,45,0,0,262.375,1,?,C,True,1 female,22,0,1,55.0,1,E33,S,False,1 male,,0,0,26.55,1,?,S,True,1 male,41,0,0,30.5,1,A21,S,True,0 male,48,0,0,50.4958,1,B10,C,True,0 male,,0,0,39.6,1,?,C,True,0 female,44,0,0,27.7208,1,B4,C,True,1 female,59,2,0,51.4792,1,C101,S,False,1 female,60,0,0,76.2917,1,D15,C,True,1 female,41,0,0,134.5,1,E40,C,True,1 male,45,0,0,26.55,1,B38,S,True,0 male,,0,0,31.0,1,?,S,True,0 male,42,0,0,26.2875,1,E24,S,True,1 female,53,0,0,27.4458,1,?,C,True,1 male,36,0,1,512.3292,1,B51 B53 B55,C,False,1 female,58,0,1,512.3292,1,B51 B53 B55,C,False,1 male,33,0,0,5.0,1,B51 B53 B55,S,True,0 male,28,0,0,47.1,1,?,S,True,0 male,17,0,0,47.1,1,?,S,True,0 male,11,1,2,120.0,1,B96 B98,S,False,1 female,14,1,2,120.0,1,B96 B98,S,False,1 male,36,1,2,120.0,1,B96 B98,S,False,1 female,36,1,2,120.0,1,B96 B98,S,False,1 male,49,0,0,26.0,1,?,S,True,0 female,,0,0,27.7208,1,?,C,True,1 male,36,1,0,78.85,1,C46,S,False,0 female,76,1,0,78.85,1,C46,S,False,1 male,46,1,0,61.175,1,E31,S,False,0 female,47,1,0,61.175,1,E31,S,False,1 male,27,1,0,53.1,1,E8,S,False,1 female,33,1,0,53.1,1,E8,S,False,1 female,36,0,0,262.375,1,B61,C,True,1 female,30,0,0,86.5,1,B77,S,True,1 male,45,0,0,29.7,1,A9,C,True,1 female,,0,1,55.0,1,E33,S,False,1 male,,0,0,0.0,1,?,S,True,0 male,27,1,0,136.7792,1,C89,C,False,0 female,26,1,0,136.7792,1,C89,C,False,1 female,22,0,0,151.55,1,?,S,True,1 male,,0,0,52.0,1,A14,S,True,0 male,47,0,0,25.5875,1,E58,S,True,0 female,39,1,1,83.1583,1,E49,C,False,1 male,37,1,1,83.1583,1,E52,C,False,0 female,64,0,2,83.1583,1,E45,C,False,1 female,55,2,0,25.7,1,C101,S,False,1 male,,0,0,26.55,1,?,S,True,0 male,70,1,1,71.0,1,B22,S,False,0 female,36,0,2,71.0,1,B22,S,False,1 female,64,1,1,26.55,1,B26,S,False,1 male,39,1,0,71.2833,1,C85,C,False,0 female,38,1,0,71.2833,1,C85,C,False,1 male,51,0,0,26.55,1,E17,S,True,1 male,27,0,0,30.5,1,?,S,True,1 female,33,0,0,151.55,1,?,S,True,1 male,31,1,0,52.0,1,B71,S,False,0 female,27,1,2,52.0,1,B71,S,False,1 male,31,1,0,57.0,1,B20,S,False,1 female,17,1,0,57.0,1,B20,S,False,1 male,53,1,1,81.8583,1,A34,S,False,1 male,4,0,2,81.8583,1,A34,S,False,1 female,54,1,1,81.8583,1,A34,S,False,1 male,50,1,0,106.425,1,C86,C,False,0 female,27,1,1,247.5208,1,B58 B60,C,False,1 female,48,1,0,106.425,1,C86,C,False,1 female,48,1,0,39.6,1,A16,C,False,1 male,49,1,0,56.9292,1,A20,C,False,1 male,39,0,0,29.7,1,A18,C,True,0 female,23,0,1,83.1583,1,C54,C,False,1 female,38,0,0,227.525,1,C45,C,True,1 female,54,1,0,78.2667,1,D20,C,False,1 female,36,0,0,31.6792,1,A29,C,True,0 male,,0,0,221.7792,1,C95,S,True,0 female,,0,0,31.6833,1,?,S,True,1 female,,0,0,110.8833,1,?,C,True,1 male,36,0,0,26.3875,1,E25,S,True,1 male,30,0,0,27.75,1,C111,C,True,0 female,24,3,2,263.0,1,C23 C25 C27,S,False,1 female,28,3,2,263.0,1,C23 C25 C27,S,False,1 female,23,3,2,263.0,1,C23 C25 C27,S,False,1 male,19,3,2,263.0,1,C23 C25 C27,S,False,0 male,64,1,4,263.0,1,C23 C25 C27,S,False,0 female,60,1,4,263.0,1,C23 C25 C27,S,False,1 female,30,0,0,56.9292,1,E36,C,True,1 male,,0,0,26.55,1,D34,S,True,0 male,50,2,0,133.65,1,?,S,False,1 male,43,1,0,27.7208,1,D40,C,False,1 female,,1,0,133.65,1,?,S,False,1 female,22,0,2,49.5,1,B39,C,False,1 male,60,1,1,79.2,1,B41,C,False,1 female,48,1,1,79.2,1,B41,C,False,1 male,,0,0,0.0,1,B102,S,True,0 male,37,1,0,53.1,1,C123,S,False,0 female,35,1,0,53.1,1,C123,S,False,1 male,47,0,0,38.5,1,E63,S,True,0 female,35,0,0,211.5,1,C130,C,True,1 female,22,0,1,59.4,1,?,C,False,1 female,45,0,1,59.4,1,?,C,False,1 male,24,0,0,79.2,1,B86,C,True,0 male,49,1,0,89.1042,1,C92,C,False,1 female,,1,0,89.1042,1,C92,C,False,1 male,71,0,0,34.6542,1,A5,C,True,0 male,53,0,0,28.5,1,C51,C,True,1 female,19,0,0,30.0,1,B42,S,True,1 male,38,0,1,153.4625,1,C91,S,False,0 female,58,0,1,153.4625,1,C125,S,False,1 male,23,0,1,63.3583,1,D10 D12,C,False,1 female,45,0,1,63.3583,1,D10 D12,C,False,1 male,46,0,0,79.2,1,B82 B84,C,True,0 male,25,1,0,55.4417,1,E50,C,False,1 female,25,1,0,55.4417,1,E50,C,False,1 male,48,1,0,76.7292,1,D33,C,False,1 female,49,1,0,76.7292,1,D33,C,False,1 male,,0,0,42.4,1,?,S,True,0 male,45,1,0,83.475,1,C83,S,False,0 female,35,1,0,83.475,1,C83,S,False,1 male,40,0,0,0.0,1,B94,S,True,0 male,27,0,0,76.7292,1,D49,C,True,1 male,,0,0,30.0,1,D45,S,True,1 female,24,0,0,83.1583,1,C54,C,True,1 male,55,1,1,93.5,1,B69,S,False,0 female,52,1,1,93.5,1,B69,S,False,1 male,42,0,0,42.5,1,B11,S,True,0 male,,0,0,51.8625,1,E46,S,True,0 male,55,0,0,50.0,1,C39,S,True,0 female,16,0,1,57.9792,1,B18,C,False,1 female,44,0,1,57.9792,1,B18,C,False,1 female,51,1,0,77.9583,1,D11,S,False,1 male,42,1,0,52.0,1,?,S,False,0 female,35,1,0,52.0,1,?,S,False,1 male,35,0,0,26.55,1,?,C,True,1 male,38,1,0,90.0,1,C93,S,False,1 male,,0,0,30.6958,1,?,C,True,0 female,35,1,0,90.0,1,C93,S,False,1 female,38,0,0,80.0,1,B28,?,True,1 female,50,0,0,28.7125,1,C49,C,True,0 male,49,0,0,0.0,1,B52 B54 B56,S,True,1 male,46,0,0,26.0,1,?,S,True,0 male,50,0,0,26.0,1,E60,S,True,0 male,32.5,0,0,211.5,1,C132,C,True,0 male,58,0,0,29.7,1,B37,C,True,0 male,41,1,0,51.8625,1,D21,S,False,0 female,,1,0,51.8625,1,D21,S,False,1 male,42,1,0,52.5542,1,D19,S,False,1 female,45,1,0,52.5542,1,D19,S,False,1 male,,0,0,26.55,1,C124,S,True,0 female,39,0,0,211.3375,1,?,S,True,1 female,49,0,0,25.9292,1,D17,S,True,1 female,30,0,0,106.425,1,?,C,True,1 male,35,0,0,512.3292,1,B101,C,True,1 male,,0,0,27.7208,1,?,C,True,0 male,42,0,0,26.55,1,?,S,True,0 female,55,0,0,27.7208,1,?,C,True,1 female,16,0,1,39.4,1,D28,S,False,1 female,51,0,1,39.4,1,D28,S,False,1 male,29,0,0,30.0,1,D6,S,True,0 female,21,0,0,77.9583,1,D9,S,True,1 male,30,0,0,45.5,1,?,S,True,0 female,58,0,0,146.5208,1,B80,C,True,1 female,15,0,1,211.3375,1,B5,S,False,1 male,30,0,0,26.0,1,C106,S,True,0 female,16,0,0,86.5,1,B79,S,True,1 male,,0,0,29.7,1,C47,C,True,1 male,19,1,0,53.1,1,D30,S,False,0 female,18,1,0,53.1,1,D30,S,False,1 female,24,0,0,49.5042,1,C90,C,True,1 male,46,0,0,75.2417,1,C6,C,True,0 male,54,0,0,51.8625,1,E46,S,True,0 male,36,0,0,26.2875,1,E25,S,True,1 male,28,1,0,82.1708,1,?,C,False,0 female,,1,0,82.1708,1,?,C,False,1 male,65,0,0,26.55,1,E38,S,True,0 male,44,2,0,90.0,1,C78,Q,False,0 female,33,1,0,90.0,1,C78,Q,False,1 female,37,1,0,90.0,1,C78,Q,False,1 male,30,1,0,57.75,1,C78,C,False,1 male,55,0,0,30.5,1,C30,S,True,0 male,47,0,0,42.4,1,?,S,True,0 male,37,0,1,29.7,1,C118,C,False,0 female,31,1,0,113.275,1,D36,C,False,1 female,23,1,0,113.275,1,D36,C,False,1 male,58,0,2,113.275,1,D48,C,False,0 female,19,0,2,26.2833,1,D47,S,False,1 male,64,0,0,26.0,1,?,S,True,0 female,39,0,0,108.9,1,C105,C,True,1 male,,0,0,25.7417,1,?,C,True,1 female,22,0,1,61.9792,1,B36,C,False,1 male,65,0,1,61.9792,1,B30,C,False,0 male,28.5,0,0,27.7208,1,D43,C,True,0 male,,0,0,0.0,1,?,S,True,0 male,45.5,0,0,28.5,1,C124,S,True,0 male,23,0,0,93.5,1,B24,S,True,0 male,29,1,0,66.6,1,C2,S,False,0 female,22,1,0,66.6,1,C2,S,False,1 male,18,1,0,108.9,1,C65,C,False,0 female,17,1,0,108.9,1,C65,C,False,1 female,30,0,0,93.5,1,B73,S,True,1 male,52,0,0,30.5,1,C104,S,True,1 male,47,0,0,52.0,1,C110,S,True,0 female,56,0,1,83.1583,1,C50,C,False,1 male,38,0,0,0.0,1,?,S,True,0 male,,0,0,39.6,1,?,S,True,1 male,22,0,0,135.6333,1,?,C,True,0 male,,0,0,227.525,1,?,C,True,0 female,43,0,1,211.3375,1,B3,S,False,1 male,31,0,0,50.4958,1,A24,S,True,0 male,45,0,0,26.55,1,?,S,True,1 male,,0,0,50.0,1,A32,S,True,0 female,33,0,0,27.7208,1,A11,C,True,1 male,46,0,0,79.2,1,?,C,True,0 male,36,0,0,40.125,1,A10,C,True,0 female,33,0,0,86.5,1,B77,S,True,1 male,55,1,0,59.4,1,?,C,False,0 female,54,1,0,59.4,1,?,C,False,1 male,33,0,0,26.55,1,?,S,True,0 male,13,2,2,262.375,1,B57 B59 B63 B66,C,False,1 female,18,2,2,262.375,1,B57 B59 B63 B66,C,False,1 female,21,2,2,262.375,1,B57 B59 B63 B66,C,False,1 male,61,1,3,262.375,1,B57 B59 B63 B66,C,False,0 female,48,1,3,262.375,1,B57 B59 B63 B66,C,False,1 male,,0,0,30.5,1,C106,S,True,1 female,24,0,0,69.3,1,B35,C,True,1 male,,0,0,26.0,1,?,S,True,1 female,35,1,0,57.75,1,C28,C,False,1 female,30,0,0,31.0,1,?,C,True,1 male,34,0,0,26.55,1,?,S,True,1 female,40,0,0,153.4625,1,C125,S,True,1 male,35,0,0,26.2875,1,E24,S,True,1 male,50,1,0,55.9,1,E44,S,False,0 female,39,1,0,55.9,1,E44,S,False,1 male,56,0,0,35.5,1,A26,C,True,1 male,28,0,0,35.5,1,A6,S,True,1 male,56,0,0,26.55,1,?,S,True,0 male,56,0,0,30.6958,1,A7,C,True,0 male,24,1,0,60.0,1,C31,S,False,0 male,,0,0,26.0,1,A19,S,True,0 female,18,1,0,60.0,1,C31,S,False,1 male,24,1,0,82.2667,1,B45,S,False,1 female,23,1,0,82.2667,1,B45,S,False,1 male,6,0,2,134.5,1,E34,C,False,1 male,45,1,1,134.5,1,E34,C,False,1 female,40,1,1,134.5,1,E34,C,False,1 male,57,1,0,146.5208,1,B78,C,False,0 female,,1,0,146.5208,1,B78,C,False,1 male,32,0,0,30.5,1,B50,C,True,1 male,62,0,0,26.55,1,C87,S,True,0 male,54,1,0,55.4417,1,C116,C,False,1 female,43,1,0,55.4417,1,C116,C,False,1 female,52,1,0,78.2667,1,D20,C,False,1 male,,0,0,27.7208,1,?,C,True,0 female,62,0,0,80.0,1,B28,?,True,1 male,67,1,0,221.7792,1,C55 C57,S,False,0 female,63,1,0,221.7792,1,C55 C57,S,False,0 male,61,0,0,32.3208,1,D50,S,True,0 female,48,0,0,25.9292,1,D17,S,True,1 female,18,0,2,79.65,1,E68,S,False,1 male,52,1,1,79.65,1,E67,S,False,0 female,39,1,1,79.65,1,E67,S,False,1 male,48,1,0,52.0,1,C126,S,False,1 female,,1,0,52.0,1,C126,S,False,1 male,49,1,1,110.8833,1,C68,C,False,0 male,17,0,2,110.8833,1,C70,C,False,1 female,39,1,1,110.8833,1,C68,C,False,1 female,,0,0,79.2,1,?,C,True,1 male,31,0,0,28.5375,1,C53,C,True,1 male,40,0,0,27.7208,1,?,C,True,0 male,61,0,0,33.5,1,B19,S,True,0 male,47,0,0,34.0208,1,D46,S,True,0 female,35,0,0,512.3292,1,?,C,True,1 male,64,1,0,75.25,1,D37,C,False,0 female,60,1,0,75.25,1,D37,C,False,1 male,60,0,0,26.55,1,?,S,True,0 male,54,0,1,77.2875,1,D26,S,False,0 male,21,0,1,77.2875,1,D26,S,False,0 female,55,0,0,135.6333,1,C32,C,True,1 female,31,0,2,164.8667,1,C7,S,False,1 male,57,1,1,164.8667,1,?,S,False,0 female,45,1,1,164.8667,1,?,S,False,1 male,50,1,1,211.5,1,C80,C,False,0 male,27,0,2,211.5,1,C82,C,False,0 female,50,1,1,211.5,1,C80,C,False,1 female,21,0,0,26.55,1,?,S,True,1 male,51,0,1,61.3792,1,?,C,False,0 male,21,0,1,61.3792,1,?,C,False,1 male,,0,0,35.0,1,C128,S,True,0 female,31,0,0,134.5,1,E39 E41,C,True,1 male,,0,0,35.5,1,C52,S,True,1 male,62,0,0,26.55,1,?,S,True,0 female,36,0,0,135.6333,1,C32,C,True,1 male,30,1,0,24.0,2,?,C,False,0 female,28,1,0,24.0,2,?,C,False,1 male,30,0,0,13.0,2,?,S,True,0 male,18,0,0,11.5,2,?,S,True,0 male,25,0,0,10.5,2,?,S,True,0 male,34,1,0,26.0,2,?,S,False,0 female,36,1,0,26.0,2,?,S,False,1 male,57,0,0,13.0,2,?,S,True,0 male,18,0,0,11.5,2,?,S,True,0 male,23,0,0,10.5,2,?,S,True,0 female,36,0,0,13.0,2,D,S,True,1 male,28,0,0,10.5,2,?,S,True,0 male,51,0,0,12.525,2,?,S,True,0 male,32,1,0,26.0,2,?,S,False,1 female,19,1,0,26.0,2,?,S,False,1 male,28,0,0,26.0,2,?,S,True,0 male,1,2,1,39.0,2,F4,S,False,1 female,4,2,1,39.0,2,F4,S,False,1 female,12,2,1,39.0,2,F4,S,False,1 female,36,0,3,39.0,2,F4,S,False,1 male,34,0,0,13.0,2,D56,S,True,1 female,19,0,0,13.0,2,?,S,True,1 male,23,0,0,13.0,2,?,S,True,0 male,26,0,0,13.0,2,?,S,True,0 male,42,0,0,13.0,2,?,S,True,0 male,27,0,0,13.0,2,?,S,True,0 female,24,0,0,13.0,2,F33,S,True,1 female,15,0,2,39.0,2,?,S,False,1 male,60,1,1,39.0,2,?,S,False,0 female,40,1,1,39.0,2,?,S,False,1 female,20,1,0,26.0,2,?,S,False,1 male,25,1,0,26.0,2,?,S,False,0 female,36,0,0,13.0,2,?,S,True,1 male,25,0,0,13.0,2,?,S,True,0 male,42,0,0,13.0,2,?,S,True,0 female,42,0,0,13.0,2,?,S,True,1 male,0.8333,0,2,29.0,2,?,S,False,1 male,26,1,1,29.0,2,?,S,False,1 female,22,1,1,29.0,2,?,S,False,1 female,35,0,0,21.0,2,?,S,True,1 male,,0,0,0.0,2,?,S,True,0 male,19,0,0,13.0,2,?,S,True,0 female,44,1,0,26.0,2,?,S,False,0 male,54,1,0,26.0,2,?,S,False,0 male,52,0,0,13.5,2,?,S,True,0 male,37,1,0,26.0,2,?,S,False,0 female,29,1,0,26.0,2,?,S,False,0 female,25,1,1,30.0,2,?,S,False,1 female,45,0,2,30.0,2,?,S,False,1 male,29,1,0,26.0,2,?,S,False,0 female,28,1,0,26.0,2,?,S,False,1 male,29,0,0,10.5,2,?,S,True,0 male,28,0,0,13.0,2,?,S,True,0 male,24,0,0,10.5,2,?,S,True,1 female,8,0,2,26.25,2,?,S,False,1 male,31,1,1,26.25,2,?,S,False,0 female,31,1,1,26.25,2,?,S,False,1 female,22,0,0,10.5,2,F33,S,True,1 female,30,0,0,13.0,2,?,S,True,0 female,,0,0,21.0,2,?,S,True,0 male,21,0,0,11.5,2,?,S,True,0 male,,0,0,0.0,2,?,S,True,0 male,8,1,1,36.75,2,?,S,False,1 male,18,0,0,73.5,2,?,S,True,0 female,48,0,2,36.75,2,?,S,False,1 female,28,0,0,13.0,2,?,S,True,1 male,32,0,0,13.0,2,?,S,True,0 male,17,0,0,73.5,2,?,S,True,0 male,29,1,0,27.7208,2,?,C,False,0 female,24,1,0,27.7208,2,?,C,False,1 male,25,0,0,31.5,2,?,S,True,0 male,18,0,0,73.5,2,?,S,True,0 female,18,0,1,23.0,2,?,S,False,1 female,34,0,1,23.0,2,?,S,False,1 male,54,0,0,26.0,2,?,S,True,0 male,8,0,2,32.5,2,?,S,False,1 male,42,1,1,32.5,2,?,S,False,0 female,34,1,1,32.5,2,?,S,False,1 female,27,1,0,13.8583,2,?,C,False,1 female,30,1,0,13.8583,2,?,C,False,1 male,23,0,0,13.0,2,?,S,True,0 male,21,0,0,13.0,2,?,S,True,0 male,18,0,0,13.0,2,?,S,True,0 male,40,1,0,26.0,2,?,S,False,0 female,29,1,0,26.0,2,?,S,False,1 male,18,0,0,10.5,2,?,S,True,0 male,36,0,0,13.0,2,?,S,True,0 male,,0,0,0.0,2,?,S,True,0 female,38,0,0,13.0,2,?,S,True,0 male,35,0,0,26.0,2,?,S,True,0 male,38,1,0,21.0,2,?,S,False,0 male,34,1,0,21.0,2,?,S,False,0 female,34,0,0,13.0,2,?,S,True,1 male,16,0,0,26.0,2,?,S,True,0 male,26,0,0,10.5,2,?,S,True,0 male,47,0,0,10.5,2,?,S,True,0 male,21,1,0,11.5,2,?,S,False,0 male,21,1,0,11.5,2,?,S,False,0 male,24,0,0,13.5,2,?,S,True,0 male,24,0,0,13.0,2,?,S,True,0 male,34,0,0,13.0,2,?,S,True,0 male,30,0,0,13.0,2,?,S,True,0 male,52,0,0,13.0,2,?,S,True,0 male,30,0,0,13.0,2,?,S,True,0 male,0.6667,1,1,14.5,2,?,S,False,1 female,24,0,2,14.5,2,?,S,False,1 male,44,0,0,13.0,2,?,S,True,0 female,6,0,1,33.0,2,?,S,False,1 male,28,0,1,33.0,2,?,S,False,0 male,62,0,0,10.5,2,?,S,True,1 male,30,0,0,10.5,2,?,S,True,0 female,7,0,2,26.25,2,?,S,False,1 male,43,1,1,26.25,2,?,S,False,0 female,45,1,1,26.25,2,?,S,False,1 female,24,1,2,65.0,2,?,S,False,1 female,24,1,2,65.0,2,?,S,False,1 male,49,1,2,65.0,2,?,S,False,0 female,48,1,2,65.0,2,?,S,False,1 female,55,0,0,16.0,2,?,S,True,1 male,24,2,0,73.5,2,?,S,False,0 male,32,2,0,73.5,2,?,S,False,0 male,21,2,0,73.5,2,?,S,False,0 female,18,1,1,13.0,2,?,S,False,0 female,20,2,1,23.0,2,?,S,False,1 male,23,2,1,11.5,2,?,S,False,0 male,36,0,0,13.0,2,?,S,True,0 female,54,1,3,23.0,2,?,S,False,1 male,50,0,0,13.0,2,?,S,True,0 male,44,1,0,26.0,2,?,S,False,0 female,29,1,0,26.0,2,?,S,False,1 male,21,0,0,73.5,2,?,S,True,0 male,42,0,0,13.0,2,?,S,True,1 male,63,1,0,26.0,2,?,S,False,0 female,60,1,0,26.0,2,?,S,False,0 male,33,0,0,12.275,2,?,S,True,0 female,17,0,0,10.5,2,?,S,True,1 male,42,1,0,27.0,2,?,S,False,0 female,24,2,1,27.0,2,?,S,False,1 male,47,0,0,15.0,2,?,S,True,0 male,24,2,0,31.5,2,?,S,False,0 male,22,2,0,31.5,2,?,S,False,0 male,32,0,0,10.5,2,?,S,True,0 female,23,0,0,13.7917,2,D,C,True,1 male,34,1,0,26.0,2,?,S,False,0 female,24,1,0,26.0,2,?,S,False,1 female,22,0,0,21.0,2,?,S,True,0 female,,0,0,12.35,2,E101,Q,True,1 male,35,0,0,12.35,2,?,Q,True,0 female,45,0,0,13.5,2,?,S,True,1 male,57,0,0,12.35,2,?,Q,True,0 male,,0,0,0.0,2,?,S,True,0 male,31,0,0,10.5,2,?,S,True,0 female,26,1,1,26.0,2,?,S,False,0 male,30,1,1,26.0,2,?,S,False,0 male,,0,0,10.7083,2,?,Q,True,0 female,1,1,2,41.5792,2,?,C,False,1 female,3,1,2,41.5792,2,?,C,False,1 male,25,1,2,41.5792,2,?,C,False,0 female,22,1,2,41.5792,2,?,C,False,1 female,17,0,0,12.0,2,?,C,True,1 female,,0,0,33.0,2,?,S,True,1 female,34,0,0,10.5,2,F33,S,True,1 male,36,0,0,12.875,2,D,C,True,0 male,24,0,0,10.5,2,?,S,True,0 male,61,0,0,12.35,2,?,Q,True,0 male,50,1,0,26.0,2,?,S,False,0 female,42,1,0,26.0,2,?,S,False,1 female,57,0,0,10.5,2,E77,S,True,0 male,,0,0,15.0458,2,D,C,True,0 male,1,0,2,37.0042,2,?,C,False,1 male,31,1,1,37.0042,2,?,C,False,0 female,24,1,1,37.0042,2,?,C,False,1 male,,0,0,15.5792,2,?,C,True,0 male,30,0,0,13.0,2,?,S,True,0 male,40,0,0,16.0,2,?,S,True,0 male,32,0,0,13.5,2,?,S,True,0 male,30,0,0,13.0,2,?,S,True,0 male,46,0,0,26.0,2,?,S,True,0 female,13,0,1,19.5,2,?,S,False,1 female,41,0,1,19.5,2,?,S,False,1 male,19,0,0,10.5,2,?,S,True,1 male,39,0,0,13.0,2,?,S,True,0 male,48,0,0,13.0,2,?,S,True,0 male,70,0,0,10.5,2,?,S,True,0 male,27,0,0,13.0,2,?,S,True,0 male,54,0,0,14.0,2,?,S,True,0 male,39,0,0,26.0,2,?,S,True,0 male,16,0,0,10.5,2,?,S,True,0 male,62,0,0,9.6875,2,?,Q,True,0 male,32.5,1,0,30.0708,2,?,C,False,0 female,14,1,0,30.0708,2,?,C,False,1 male,2,1,1,26.0,2,F2,S,False,1 male,3,1,1,26.0,2,F2,S,False,1 male,36.5,0,2,26.0,2,F2,S,False,0 male,26,0,0,13.0,2,F2,S,True,0 male,19,1,1,36.75,2,?,S,False,0 male,28,0,0,13.5,2,?,S,True,0 male,20,0,0,13.8625,2,D38,C,True,1 female,29,0,0,10.5,2,F33,S,True,1 male,39,0,0,13.0,2,?,S,True,0 male,22,0,0,10.5,2,?,S,True,1 male,,0,0,13.8625,2,?,C,True,1 male,23,0,0,10.5,2,?,S,True,0 male,29,0,0,13.8583,2,?,C,True,1 male,28,0,0,10.5,2,?,S,True,0 male,,0,0,0.0,2,?,S,True,0 female,50,0,1,26.0,2,?,S,False,1 male,19,0,0,10.5,2,?,S,True,0 male,,0,0,15.05,2,?,C,True,0 male,41,0,0,13.0,2,?,S,True,0 female,21,0,1,21.0,2,?,S,False,1 female,19,0,0,26.0,2,?,S,True,1 male,43,0,1,21.0,2,?,S,False,0 female,32,0,0,13.0,2,?,S,True,1 male,34,0,0,13.0,2,?,S,True,0 male,30,0,0,12.7375,2,?,C,True,1 male,27,0,0,15.0333,2,?,C,True,0 female,2,1,1,26.0,2,?,S,False,1 female,8,1,1,26.0,2,?,S,False,1 female,33,0,2,26.0,2,?,S,False,1 male,36,0,0,10.5,2,?,S,True,0 male,34,1,0,21.0,2,?,S,False,0 female,30,3,0,21.0,2,?,S,False,1 female,28,0,0,13.0,2,?,S,True,1 male,23,0,0,15.0458,2,?,C,True,0 male,0.8333,1,1,18.75,2,?,S,False,1 male,3,1,1,18.75,2,?,S,False,1 female,24,2,3,18.75,2,?,S,False,1 female,50,0,0,10.5,2,?,S,True,1 male,19,0,0,10.5,2,?,S,True,0 female,21,0,0,10.5,2,?,S,True,1 male,26,0,0,13.0,2,?,S,True,0 male,25,0,0,13.0,2,?,S,True,0 male,27,0,0,26.0,2,?,S,True,0 female,25,0,1,26.0,2,?,S,False,1 female,18,0,2,13.0,2,?,S,False,1 female,20,0,0,36.75,2,?,S,True,1 female,30,0,0,13.0,2,?,S,True,1 male,59,0,0,13.5,2,?,S,True,0 female,30,0,0,12.35,2,?,Q,True,1 male,35,0,0,10.5,2,?,S,True,0 female,40,0,0,13.0,2,?,S,True,1 male,25,0,0,13.0,2,?,S,True,0 male,41,0,0,15.0458,2,?,C,True,0 male,25,0,0,10.5,2,?,S,True,0 male,18.5,0,0,13.0,2,F,S,True,0 male,14,0,0,65.0,2,?,S,True,0 female,50,0,0,10.5,2,?,S,True,1 male,23,0,0,13.0,2,?,S,True,0 female,28,0,0,12.65,2,?,S,True,1 female,27,0,0,10.5,2,E101,S,True,1 male,29,1,0,21.0,2,?,S,False,0 female,27,1,0,21.0,2,?,S,False,0 male,40,0,0,13.0,2,?,S,True,0 female,31,0,0,21.0,2,?,S,True,1 male,30,1,0,21.0,2,?,S,False,0 male,23,1,0,10.5,2,?,S,False,0 female,31,0,0,21.0,2,?,S,True,1 male,,0,0,0.0,2,?,S,True,0 female,12,0,0,15.75,2,?,S,True,1 female,40,0,0,15.75,2,?,S,True,1 female,32.5,0,0,13.0,2,E101,S,True,1 male,27,1,0,26.0,2,?,S,False,0 female,29,1,0,26.0,2,?,S,False,1 male,2,1,1,23.0,2,?,S,False,1 female,4,1,1,23.0,2,?,S,False,1 female,29,0,2,23.0,2,?,S,False,1 female,0.9167,1,2,27.75,2,?,S,False,1 female,5,1,2,27.75,2,?,S,False,1 male,36,1,2,27.75,2,?,S,False,0 female,33,1,2,27.75,2,?,S,False,1 male,66,0,0,10.5,2,?,S,True,0 male,,0,0,12.875,2,?,S,True,0 male,31,0,0,13.0,2,?,S,True,1 male,,0,0,13.0,2,?,S,True,1 female,26,0,0,13.5,2,?,S,True,1 female,24,0,0,13.0,2,?,S,True,0 male,42,0,0,7.55,3,?,S,True,0 male,13,0,2,20.25,3,?,S,False,0 male,16,1,1,20.25,3,?,S,False,0 female,35,1,1,20.25,3,?,S,False,1 female,16,0,0,7.65,3,?,S,True,1 male,25,0,0,7.65,3,F G63,S,True,1 male,20,0,0,7.925,3,?,S,True,1 female,18,0,0,7.2292,3,?,C,True,1 male,30,0,0,7.25,3,?,S,True,0 male,26,0,0,8.05,3,?,S,True,0 female,40,1,0,9.475,3,?,S,False,0 male,0.8333,0,1,9.35,3,?,S,False,1 female,18,0,1,9.35,3,?,S,False,1 male,26,0,0,18.7875,3,?,C,True,1 male,26,0,0,7.8875,3,?,S,True,0 male,20,0,0,7.925,3,?,S,True,0 male,24,0,0,7.05,3,?,S,True,0 male,25,0,0,7.05,3,?,S,True,0 male,35,0,0,8.05,3,?,S,True,0 male,18,0,0,8.3,3,?,S,True,0 male,32,0,0,22.525,3,?,S,True,0 female,19,1,0,7.8542,3,?,S,False,1 male,4,4,2,31.275,3,?,S,False,0 female,6,4,2,31.275,3,?,S,False,0 female,2,4,2,31.275,3,?,S,False,0 female,17,4,2,7.925,3,?,S,False,1 female,38,4,2,7.775,3,?,S,False,0 female,9,4,2,31.275,3,?,S,False,0 female,11,4,2,31.275,3,?,S,False,0 male,39,1,5,31.275,3,?,S,False,0 male,27,0,0,7.7958,3,?,S,True,1 male,26,0,0,7.775,3,?,S,True,0 female,39,1,5,31.275,3,?,S,False,0 male,20,0,0,7.8542,3,?,S,True,0 male,26,0,0,7.8958,3,?,S,True,0 male,25,1,0,17.8,3,?,S,False,0 female,18,1,0,17.8,3,?,S,False,0 male,24,0,0,7.775,3,?,S,True,0 male,35,0,0,7.05,3,?,S,True,0 male,5,4,2,31.3875,3,?,S,False,0 male,9,4,2,31.3875,3,?,S,False,0 male,3,4,2,31.3875,3,?,S,False,1 male,13,4,2,31.3875,3,?,S,False,0 female,5,4,2,31.3875,3,?,S,False,1 male,40,1,5,31.3875,3,?,S,False,0 male,23,0,0,7.7958,3,?,S,True,1 female,38,1,5,31.3875,3,?,S,False,1 female,45,0,0,7.225,3,?,C,True,1 male,21,0,0,7.225,3,?,C,True,0 male,23,0,0,7.05,3,?,S,True,0 female,17,0,0,14.4583,3,?,C,True,0 male,30,0,0,7.225,3,?,C,True,0 male,23,0,0,7.8542,3,?,S,True,0 female,13,0,0,7.2292,3,?,C,True,1 male,20,0,0,7.225,3,?,C,True,0 male,32,1,0,15.85,3,?,S,False,0 female,33,3,0,15.85,3,?,S,False,1 female,0.75,2,1,19.2583,3,?,C,False,1 female,0.75,2,1,19.2583,3,?,C,False,1 female,5,2,1,19.2583,3,?,C,False,1 female,24,0,3,19.2583,3,?,C,False,1 female,18,0,0,8.05,3,?,S,True,1 male,40,0,0,7.225,3,?,C,True,0 male,26,0,0,7.8958,3,?,S,True,0 male,20,0,0,7.2292,3,?,C,True,1 female,18,0,1,14.4542,3,?,C,False,0 female,45,0,1,14.4542,3,?,C,False,0 female,27,0,0,7.8792,3,?,Q,True,0 male,22,0,0,8.05,3,?,S,True,0 male,19,0,0,8.05,3,?,S,True,0 male,26,0,0,7.775,3,?,S,True,0 male,22,0,0,9.35,3,?,S,True,0 male,,0,0,7.2292,3,?,C,True,0 male,20,0,0,4.0125,3,?,C,True,0 male,32,0,0,56.4958,3,?,S,True,1 male,21,0,0,7.775,3,?,S,True,0 male,18,0,0,7.75,3,?,S,True,0 male,26,0,0,7.8958,3,?,S,True,0 male,6,1,1,15.2458,3,?,C,False,0 female,9,1,1,15.2458,3,?,C,False,0 male,,0,0,7.225,3,?,C,True,0 female,,0,2,15.2458,3,?,C,False,0 female,,0,2,7.75,3,?,Q,False,0 male,40,1,1,15.5,3,?,Q,False,0 female,32,1,1,15.5,3,?,Q,False,0 male,21,0,0,16.1,3,?,S,True,0 female,22,0,0,7.725,3,?,Q,True,1 female,20,0,0,7.8542,3,?,S,True,0 male,29,1,0,7.0458,3,?,S,False,0 male,22,1,0,7.25,3,?,S,False,0 male,22,0,0,7.7958,3,?,S,True,0 male,35,0,0,8.05,3,?,S,True,0 female,18.5,0,0,7.2833,3,?,Q,True,0 male,21,0,0,7.8208,3,?,Q,True,1 male,19,0,0,6.75,3,?,Q,True,0 female,18,0,0,7.8792,3,?,Q,True,0 female,21,0,0,8.6625,3,?,S,True,0 female,30,0,0,8.6625,3,?,S,True,0 male,18,0,0,8.6625,3,?,S,True,0 male,38,0,0,8.6625,3,?,S,True,0 male,17,0,0,8.6625,3,?,S,True,0 male,17,0,0,8.6625,3,?,S,True,0 female,21,0,0,7.75,3,?,Q,True,0 male,21,0,0,7.75,3,?,Q,True,0 male,21,0,0,8.05,3,?,S,True,0 male,,1,0,14.4583,3,?,C,False,0 female,,1,0,14.4583,3,?,C,False,0 male,28,0,0,7.7958,3,?,S,True,0 male,24,0,0,7.8542,3,?,S,True,0 female,16,0,0,7.75,3,?,Q,True,1 female,37,0,0,7.75,3,?,Q,True,0 male,28,0,0,7.25,3,?,S,True,0 male,24,0,0,8.05,3,?,S,True,0 male,21,0,0,7.7333,3,?,Q,True,0 male,32,0,0,56.4958,3,?,S,True,1 male,29,0,0,8.05,3,?,S,True,0 male,26,1,0,14.4542,3,?,C,False,0 male,18,1,0,14.4542,3,?,C,False,0 male,20,0,0,7.05,3,?,S,True,0 male,18,0,0,8.05,3,?,S,True,1 male,24,0,0,7.25,3,?,Q,True,0 male,36,0,0,7.4958,3,?,S,True,0 male,24,0,0,7.4958,3,?,S,True,0 male,31,0,0,7.7333,3,?,Q,True,0 male,31,0,0,7.75,3,?,Q,True,0 female,22,0,0,7.75,3,?,Q,True,1 female,30,0,0,7.6292,3,?,Q,True,0 male,70.5,0,0,7.75,3,?,Q,True,0 male,43,0,0,8.05,3,?,S,True,0 male,35,0,0,7.8958,3,?,S,True,0 male,27,0,0,7.8958,3,?,S,True,0 male,19,0,0,7.8958,3,?,S,True,0 male,30,0,0,8.05,3,?,S,True,0 male,9,1,1,15.9,3,?,S,False,1 male,3,1,1,15.9,3,?,S,False,1 female,36,0,2,15.9,3,?,S,False,1 male,59,0,0,7.25,3,?,S,True,0 male,19,0,0,8.1583,3,?,S,True,0 female,17,0,1,16.1,3,?,S,False,1 male,44,0,1,16.1,3,?,S,False,0 male,17,0,0,8.6625,3,?,S,True,0 male,22.5,0,0,7.225,3,?,C,True,0 male,45,0,0,8.05,3,?,S,True,1 female,22,0,0,10.5167,3,?,S,True,0 male,19,0,0,10.1708,3,?,S,True,0 female,30,0,0,6.95,3,?,Q,True,1 male,29,0,0,7.75,3,?,Q,True,1 male,0.3333,0,2,14.4,3,?,S,False,0 male,34,1,1,14.4,3,?,S,False,0 female,28,1,1,14.4,3,?,S,False,0 male,27,0,0,7.8958,3,?,S,True,0 male,25,0,0,7.8958,3,?,S,True,0 male,24,2,0,24.15,3,?,S,False,0 male,22,0,0,8.05,3,?,S,True,0 male,21,2,0,24.15,3,?,S,False,0 male,17,2,0,8.05,3,?,S,False,0 male,,1,0,16.1,3,?,S,False,0 female,,1,0,16.1,3,?,S,False,1 male,36.5,1,0,17.4,3,?,S,False,1 female,36,1,0,17.4,3,?,S,False,1 male,30,0,0,9.5,3,?,S,True,1 male,16,0,0,9.5,3,?,S,True,0 male,1,1,2,20.575,3,?,S,False,1 female,0.1667,1,2,20.575,3,?,S,False,1 male,26,1,2,20.575,3,?,S,False,0 female,33,1,2,20.575,3,?,S,False,1 male,25,0,0,7.8958,3,?,S,True,0 male,,0,0,7.8958,3,?,S,True,0 male,,0,0,7.8958,3,?,S,True,0 male,22,0,0,7.25,3,?,S,True,0 male,36,0,0,7.25,3,?,S,True,0 female,19,0,0,7.8792,3,?,Q,True,1 male,17,0,0,7.8958,3,?,S,True,0 male,42,0,0,8.6625,3,?,S,True,0 male,43,0,0,7.8958,3,?,S,True,0 male,,0,0,7.2292,3,?,C,True,0 male,32,0,0,7.75,3,?,Q,True,0 male,19,0,0,8.05,3,?,S,True,1 female,30,0,0,12.475,3,?,S,True,1 female,24,0,0,7.75,3,?,Q,True,0 female,23,0,0,8.05,3,?,S,True,1 male,33,0,0,7.8958,3,?,C,True,0 male,65,0,0,7.75,3,?,Q,True,0 male,24,0,0,7.55,3,?,S,True,1 male,23,1,0,13.9,3,?,S,False,0 female,22,1,0,13.9,3,?,S,False,1 male,18,0,0,7.775,3,?,S,True,0 male,16,0,0,7.775,3,?,S,True,0 male,45,0,0,6.975,3,?,S,True,0 male,,0,0,7.225,3,?,C,True,0 male,39,0,2,7.2292,3,?,C,False,0 male,17,1,1,7.2292,3,?,C,False,0 male,15,1,1,7.2292,3,?,C,False,0 male,47,0,0,7.25,3,?,S,True,0 female,5,0,0,12.475,3,?,S,True,1 male,,0,0,7.225,3,?,C,True,0 male,40.5,0,0,15.1,3,?,S,True,0 male,40.5,0,0,7.75,3,?,Q,True,0 male,,0,0,7.05,3,?,S,True,1 male,18,0,0,7.7958,3,?,S,True,0 female,,0,0,7.75,3,?,Q,True,0 male,,0,0,7.75,3,?,Q,True,0 male,,0,0,6.95,3,?,Q,True,0 male,26,0,0,7.8792,3,?,Q,True,0 male,,0,0,7.75,3,?,Q,True,0 male,,0,0,56.4958,3,?,S,True,1 female,21,2,2,34.375,3,?,S,False,0 female,9,2,2,34.375,3,?,S,False,0 male,,0,0,8.05,3,?,S,True,0 male,18,2,2,34.375,3,?,S,False,0 male,16,1,3,34.375,3,?,S,False,0 female,48,1,3,34.375,3,?,S,False,0 male,,0,0,7.75,3,?,Q,True,0 male,,0,0,7.25,3,?,S,True,0 male,25,0,0,7.7417,3,?,Q,True,0 male,,0,0,14.5,3,?,S,True,0 male,,0,0,7.8958,3,?,C,True,0 male,22,0,0,8.05,3,?,S,True,0 female,16,0,0,7.7333,3,?,Q,True,1 female,,0,0,7.75,3,?,Q,True,1 male,9,0,2,20.525,3,?,S,False,1 male,33,1,1,20.525,3,?,S,False,0 male,41,0,0,7.85,3,?,S,True,0 female,31,1,1,20.525,3,?,S,False,1 male,38,0,0,7.05,3,?,S,True,0 male,9,5,2,46.9,3,?,S,False,0 male,1,5,2,46.9,3,?,S,False,0 male,11,5,2,46.9,3,?,S,False,0 female,10,5,2,46.9,3,?,S,False,0 female,16,5,2,46.9,3,?,S,False,0 male,14,5,2,46.9,3,?,S,False,0 male,40,1,6,46.9,3,?,S,False,0 female,43,1,6,46.9,3,?,S,False,0 male,51,0,0,8.05,3,?,S,True,0 male,32,0,0,8.3625,3,?,S,True,0 male,,0,0,8.05,3,?,S,True,0 male,20,0,0,9.8458,3,?,S,True,0 male,37,2,0,7.925,3,?,S,False,0 male,28,2,0,7.925,3,?,S,False,0 male,19,0,0,7.775,3,?,S,True,0 female,24,0,0,8.85,3,?,S,True,0 female,17,0,0,7.7333,3,?,Q,True,0 male,,1,0,19.9667,3,?,S,False,0 male,,1,0,19.9667,3,?,S,False,0 male,28,1,0,15.85,3,?,S,False,0 female,24,1,0,15.85,3,?,S,False,1 male,20,0,0,9.5,3,?,S,True,0 male,23.5,0,0,7.2292,3,?,C,True,0 male,41,2,0,14.1083,3,?,S,False,0 male,26,1,0,7.8542,3,?,S,False,0 male,21,0,0,7.8542,3,?,S,True,0 female,45,1,0,14.1083,3,?,S,False,1 female,,0,0,7.55,3,?,S,True,0 male,25,0,0,7.25,3,?,S,True,0 male,,0,0,6.8583,3,?,Q,True,0 male,11,0,0,18.7875,3,?,C,True,0 female,,0,0,7.75,3,?,Q,True,1 male,27,0,0,6.975,3,?,S,True,1 male,,0,0,56.4958,3,?,S,True,1 female,18,0,0,6.75,3,?,Q,True,0 female,26,0,0,7.925,3,?,S,True,1 female,23,0,0,7.925,3,?,S,True,0 female,22,0,0,8.9625,3,?,S,True,1 male,28,0,0,7.8958,3,?,S,True,0 female,28,0,0,7.775,3,?,S,True,0 female,,0,0,7.75,3,?,Q,True,0 female,2,0,1,12.2875,3,?,S,False,1 female,22,1,1,12.2875,3,?,S,False,1 male,43,0,0,6.45,3,?,S,True,0 male,28,0,0,22.525,3,?,S,True,0 female,27,0,0,7.925,3,?,S,True,1 male,,0,0,7.75,3,?,Q,True,0 female,,0,0,8.05,3,?,S,True,1 male,42,0,0,7.65,3,F G63,S,True,0 male,,0,0,7.8875,3,?,S,True,1 male,30,0,0,7.2292,3,?,C,True,0 male,,0,0,7.8958,3,?,S,True,0 female,27,1,0,7.925,3,?,S,False,0 female,25,1,0,7.925,3,?,S,False,0 male,,0,0,7.8958,3,?,S,True,0 male,29,0,0,7.8958,3,?,C,True,1 male,21,0,0,7.7958,3,?,S,True,1 male,,0,0,7.05,3,?,S,True,0 male,20,0,0,7.8542,3,?,S,True,0 male,48,0,0,7.8542,3,?,S,True,0 male,17,1,0,7.0542,3,?,S,False,0 female,,0,0,7.75,3,?,Q,True,1 male,,0,0,8.1125,3,?,S,True,1 male,34,0,0,6.4958,3,?,S,True,0 male,26,0,0,7.775,3,?,S,True,1 male,22,0,0,7.7958,3,?,S,True,0 male,33,0,0,8.6542,3,?,S,True,0 male,31,0,0,7.775,3,?,S,True,0 male,29,0,0,7.8542,3,?,S,True,0 male,4,1,1,11.1333,3,?,S,False,1 female,1,1,1,11.1333,3,?,S,False,1 male,49,0,0,0.0,3,?,S,True,0 male,33,0,0,7.775,3,?,S,True,0 male,19,0,0,0.0,3,?,S,True,0 female,27,0,2,11.1333,3,?,S,False,1 male,,1,2,23.45,3,?,S,False,0 female,,1,2,23.45,3,?,S,False,0 male,,1,2,23.45,3,?,S,False,0 female,,1,2,23.45,3,?,S,False,0 male,23,0,0,7.8958,3,?,S,True,0 male,32,0,0,7.8542,3,?,S,True,1 male,27,0,0,7.8542,3,?,S,True,0 female,20,1,0,9.825,3,?,S,False,0 female,21,1,0,9.825,3,?,S,False,0 male,32,0,0,7.925,3,?,S,True,1 male,17,0,0,7.125,3,?,S,True,0 male,21,0,0,8.4333,3,?,S,True,0 male,30,0,0,7.8958,3,?,S,True,0 male,21,0,0,7.7958,3,?,S,True,1 male,33,0,0,7.8542,3,?,S,True,0 male,22,0,0,7.5208,3,?,S,True,0 female,4,0,1,13.4167,3,?,C,False,1 male,39,0,1,13.4167,3,?,C,False,1 male,,0,0,7.2292,3,?,C,True,0 male,18.5,0,0,7.2292,3,?,C,True,0 male,,0,0,7.75,3,?,Q,True,0 male,,0,0,7.25,3,?,S,True,0 female,,0,0,7.75,3,?,Q,True,1 female,,0,0,7.75,3,?,Q,True,1 male,34.5,0,0,7.8292,3,?,Q,True,0 male,44,0,0,8.05,3,?,S,True,0 male,,0,0,7.75,3,?,Q,True,1 male,,1,0,14.4542,3,?,C,False,0 female,,1,0,14.4542,3,?,C,False,0 male,,1,0,7.75,3,?,Q,False,0 male,,1,0,7.75,3,?,Q,False,0 male,,0,0,7.7375,3,?,Q,True,0 female,22,2,0,8.6625,3,?,S,False,0 male,26,2,0,8.6625,3,?,S,False,0 female,4,0,2,22.025,3,?,S,False,1 male,29,3,1,22.025,3,?,S,False,1 female,26,1,1,22.025,3,?,S,False,1 female,1,1,1,12.1833,3,?,S,False,0 male,18,1,1,7.8542,3,?,S,False,0 female,36,0,2,12.1833,3,?,S,False,0 male,,0,0,7.8958,3,?,C,True,0 male,25,0,0,7.2292,3,F E57,C,True,1 male,,0,0,7.225,3,?,C,True,0 female,37,0,0,9.5875,3,?,S,True,0 male,,0,0,7.8958,3,?,S,True,0 male,,0,0,56.4958,3,?,S,True,1 male,,0,0,56.4958,3,?,S,True,0 female,22,0,0,7.25,3,?,S,True,1 male,,0,0,7.75,3,?,Q,True,0 male,26,0,0,56.4958,3,?,S,True,1 male,29,0,0,9.4833,3,?,S,True,0 male,29,0,0,7.775,3,?,S,True,0 male,22,0,0,7.775,3,?,S,True,0 male,22,0,0,7.225,3,?,C,True,1 male,,3,1,25.4667,3,?,S,False,0 female,,3,1,25.4667,3,?,S,False,0 female,,3,1,25.4667,3,?,S,False,0 female,,3,1,25.4667,3,?,S,False,0 female,,0,4,25.4667,3,?,S,False,0 male,32,0,0,7.925,3,?,S,True,0 male,34.5,0,0,6.4375,3,?,C,True,0 female,,1,0,15.5,3,?,Q,False,0 male,,1,0,15.5,3,?,Q,False,0 male,36,0,0,0.0,3,?,S,True,0 male,39,0,0,24.15,3,?,S,True,0 male,24,0,0,9.5,3,?,S,True,0 female,25,0,0,7.775,3,?,S,True,0 female,45,0,0,7.75,3,?,S,True,0 male,36,1,0,15.55,3,?,S,False,0 female,30,1,0,15.55,3,?,S,False,0 male,20,1,0,7.925,3,?,S,False,1 male,,0,0,7.8792,3,?,Q,True,0 male,28,0,0,56.4958,3,?,S,True,0 male,,0,0,7.55,3,?,S,True,0 male,30,1,0,16.1,3,?,S,False,0 female,26,1,0,16.1,3,?,S,False,0 male,,0,0,7.8792,3,?,S,True,0 male,20.5,0,0,7.25,3,?,S,True,0 male,27,0,0,8.6625,3,?,S,True,1 male,51,0,0,7.0542,3,?,S,True,0 female,23,0,0,7.8542,3,?,S,True,1 male,32,0,0,7.5792,3,?,S,True,1 male,,0,0,7.8958,3,?,S,True,0 male,,0,0,7.55,3,?,S,True,0 female,,0,0,7.75,3,?,Q,True,1 male,24,0,0,7.1417,3,?,S,True,1 male,22,0,0,7.125,3,?,S,True,0 female,,0,0,7.8792,3,?,Q,True,0 male,,0,0,7.75,3,?,Q,True,0 male,,0,0,8.05,3,?,S,True,0 male,29,0,0,7.925,3,?,S,True,0 male,,0,0,7.2292,3,?,C,True,1 female,30.5,0,0,7.75,3,?,Q,True,0 female,,0,0,7.7375,3,?,Q,True,1 male,,0,0,7.2292,3,F E46,C,True,0 male,35,0,0,7.8958,3,?,C,True,0 male,33,0,0,7.8958,3,?,S,True,0 female,,0,0,7.225,3,?,C,True,1 male,,0,0,7.8958,3,?,C,True,0 female,,0,0,7.75,3,?,Q,True,1 male,,0,0,7.75,3,?,Q,True,1 female,,2,0,23.25,3,?,Q,False,1 female,,2,0,23.25,3,?,Q,False,1 male,,2,0,23.25,3,?,Q,False,1 female,,0,0,7.7875,3,?,Q,True,1 male,,0,0,15.5,3,?,Q,True,0 female,,0,0,7.8792,3,?,Q,True,1 female,15,0,0,8.0292,3,?,Q,True,1 female,35,0,0,7.75,3,?,Q,True,0 male,,0,0,7.75,3,?,Q,True,0 male,24,1,0,16.1,3,?,S,False,0 female,19,1,0,16.1,3,?,S,False,0 female,,0,0,7.75,3,?,Q,True,0 female,,0,0,8.05,3,?,S,True,0 female,,0,0,8.05,3,?,S,True,0 male,55.5,0,0,8.05,3,?,S,True,0 male,,0,0,7.75,3,?,Q,True,0 male,21,0,0,7.775,3,?,S,True,1 male,,0,0,8.05,3,?,S,True,0 male,24,0,0,7.8958,3,?,S,True,0 male,21,0,0,7.8958,3,?,S,True,0 male,28,0,0,7.8958,3,?,S,True,0 male,,0,0,7.8958,3,?,S,True,0 female,,0,0,7.8792,3,?,Q,True,1 male,25,0,0,7.65,3,F G73,S,True,0 male,6,0,1,12.475,3,E121,S,False,1 female,27,0,1,12.475,3,E121,S,False,1 male,,0,0,8.05,3,?,S,True,0 female,,1,0,24.15,3,?,Q,False,1 male,,1,0,24.15,3,?,Q,False,0 male,,0,0,8.4583,3,?,Q,True,0 male,34,0,0,8.05,3,?,S,True,0 male,,0,0,7.75,3,?,Q,True,0 male,,0,0,7.775,3,?,S,True,1 male,,1,1,15.2458,3,?,C,False,1 male,,1,1,15.2458,3,?,C,False,1 female,,0,2,15.2458,3,?,C,False,1 female,,0,0,7.2292,3,?,C,True,1 male,,0,0,8.05,3,?,S,True,0 female,,0,0,7.7333,3,?,Q,True,1 female,24,0,0,7.75,3,?,Q,True,1 male,,0,0,8.05,3,?,S,True,0 female,,1,0,15.5,3,?,Q,False,1 female,,1,0,15.5,3,?,Q,False,1 female,,0,0,15.5,3,?,Q,True,1 male,18,0,0,7.75,3,?,S,True,0 male,22,0,0,7.8958,3,?,S,True,0 ================================================ FILE: benchmark/experiments/__init__.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from benchmark.experiments import structured_data def get_experiments(task_name): if task_name == "structured_data_classification": return [ structured_data.Titanic(), structured_data.Iris(), structured_data.Wine(), ] ================================================ FILE: benchmark/experiments/experiment.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import shutil import timeit class Experiment(object): def __init__(self, name, tmp_dir="tmp_dir"): self.name = name self.tmp_dir = tmp_dir def get_auto_model(self): raise NotImplementedError @staticmethod def load_data(): raise NotImplementedError def run_once(self): (x_train, y_train), (x_test, y_test) = self.load_data() auto_model = self.get_auto_model() start_time = timeit.default_timer() auto_model.fit(x_train, y_train) stop_time = timeit.default_timer() accuracy = auto_model.evaluate(x_test, y_test)[1] total_time = stop_time - start_time return total_time, accuracy def run(self, repeat_times=1): total_times = [] metric_values = [] for i in range(repeat_times): total_time, metric = self.run_once() total_times.append(total_time) metric_values.append(metric) self.tear_down() return total_times, metric_values def tear_down(self): shutil.rmtree(self.tmp_dir) ================================================ FILE: benchmark/experiments/image.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from keras import datasets import autokeras as ak from benchmark.experiments import experiment class ImageClassifierExperiment(experiment.Experiment): def get_auto_model(self): return ak.ImageClassifier( max_trials=10, directory=self.tmp_dir, overwrite=True ) class MNIST(ImageClassifierExperiment): def __init__(self): super().__init__(name="MNIST") @staticmethod def load_data(): return datasets.mnist.load_data() class CIFAR10(ImageClassifierExperiment): def __init__(self): super().__init__(name="CIFAR10") @staticmethod def load_data(): return datasets.cifar10.load_data() ================================================ FILE: benchmark/experiments/structured_data.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import keras import numpy as np import pandas as pd import sklearn import autokeras as ak from benchmark.experiments import experiment class StructuredDataClassifierExperiment(experiment.Experiment): def get_auto_model(self): return ak.StructuredDataClassifier( max_trials=10, directory=self.tmp_dir, overwrite=True ) class Titanic(StructuredDataClassifierExperiment): def __init__(self): super().__init__(name="Titanic") @staticmethod def load_data(): TRAIN_DATA_URL = ( "https://storage.googleapis.com/tf-datasets/titanic/train.csv" ) TEST_DATA_URL = ( "https://storage.googleapis.com/tf-datasets/titanic/eval.csv" ) x_train = keras.utils.get_file("titanic_train.csv", TRAIN_DATA_URL) x_test = keras.utils.get_file("titanic_eval.csv", TEST_DATA_URL) return (x_train, "survived"), (x_test, "survived") class Iris(StructuredDataClassifierExperiment): def __init__(self): super().__init__(name="Iris") @staticmethod def load_data(): # Prepare the dataset. TRAIN_DATA_URL = ( "https://storage.googleapis.com/" "download.tensorflow.org/data/iris_training.csv" ) x_train = keras.utils.get_file("iris_train.csv", TRAIN_DATA_URL) TEST_DATA_URL = ( "https://storage.googleapis.com/" "download.tensorflow.org/data/iris_test.csv" ) x_test = keras.utils.get_file("iris_test.csv", TEST_DATA_URL) return (x_train, "virginica"), (x_test, "virginica") class Wine(StructuredDataClassifierExperiment): def __init__(self): super().__init__(name="Wine") @staticmethod def load_data(): DATASET_URL = ( "https://archive.ics.uci.edu/ml/" "machine-learning-databases/wine/wine.data" ) # save data dataset = keras.utils.get_file("wine.csv", DATASET_URL) data = pd.read_csv(dataset, header=None).sample(frac=1, random_state=5) split_length = int(data.shape[0] * 0.8) # 141 return (data.iloc[:split_length, 1:], data.iloc[:split_length, 0]), ( data.iloc[split_length:, 1:], data.iloc[split_length:, 0], ) class StructuredDataRegressorExperiment(experiment.Experiment): def get_auto_model(self): return ak.StructuredDataRegressor( max_trials=10, directory=self.tmp_dir, overwrite=True ) class CaliforniaHousing(StructuredDataRegressorExperiment): @staticmethod def load_data(): house_dataset = sklearn.datasets.fetch_california_housing() ( x_train, x_test, y_train, y_test, ) = sklearn.model_selection.train_test_split( house_dataset.data, np.array(house_dataset.target), test_size=0.2, random_state=42, ) return (x_train, y_train), (x_test, y_test) ================================================ FILE: benchmark/experiments/text.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import keras import numpy as np from sklearn.datasets import load_files import autokeras as ak from benchmark.experiments import experiment class IMDB(experiment.Experiment): def __init__(self): super().__init__(name="IMDB") def get_auto_model(self): return ak.TextClassifier( max_trials=10, directory=self.tmp_dir, overwrite=True ) @staticmethod def load_data(): dataset = keras.utils.get_file( fname="aclImdb.tar.gz", origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz", # noqa: E501 extract=True, ) # set path to dataset IMDB_DATADIR = os.path.join(os.path.dirname(dataset), "aclImdb") classes = ["pos", "neg"] train_data = load_files( os.path.join(IMDB_DATADIR, "train"), shuffle=True, categories=classes, ) test_data = load_files( os.path.join(IMDB_DATADIR, "test"), shuffle=False, categories=classes, ) x_train = np.array(train_data.data) y_train = np.array(train_data.target) x_test = np.array(test_data.data) y_test = np.array(test_data.target) return (x_train, y_train), (x_test, y_test) ================================================ FILE: benchmark/performance.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import keras import numpy as np from keras.datasets import cifar10 from keras.datasets import mnist from sklearn.datasets import load_files import autokeras as ak def imdb_raw(num_instances=100): dataset = keras.utils.get_file( fname="aclImdb.tar.gz", origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz", extract=True, ) # set path to dataset IMDB_DATADIR = os.path.join(os.path.dirname(dataset), "aclImdb") classes = ["pos", "neg"] train_data = load_files( os.path.join(IMDB_DATADIR, "train"), shuffle=True, categories=classes ) test_data = load_files( os.path.join(IMDB_DATADIR, "test"), shuffle=False, categories=classes ) x_train = np.array(train_data.data) y_train = np.array(train_data.target) x_test = np.array(test_data.data) y_test = np.array(test_data.target) if num_instances is not None: x_train = x_train[:num_instances] y_train = y_train[:num_instances] x_test = x_test[:num_instances] y_test = y_test[:num_instances] return (x_train, y_train), (x_test, y_test) def test_mnist_accuracy_over_98(tmp_path): (x_train, y_train), (x_test, y_test) = mnist.load_data() clf = ak.ImageClassifier(max_trials=1, directory=tmp_path) clf.fit(x_train, y_train, epochs=10) accuracy = clf.evaluate(x_test, y_test)[1] assert accuracy >= 0.98 def test_cifar10_accuracy_over_93(tmp_path): (x_train, y_train), (x_test, y_test) = cifar10.load_data() clf = ak.ImageClassifier(max_trials=3, directory=tmp_path) clf.fit(x_train, y_train, epochs=5) accuracy = clf.evaluate(x_test, y_test)[1] assert accuracy >= 0.93 def test_imdb_accuracy_over_92(tmp_path): (x_train, y_train), (x_test, y_test) = imdb_raw(num_instances=None) clf = ak.TextClassifier(max_trials=3, directory=tmp_path) clf.fit(x_train, y_train, batch_size=6, epochs=1) accuracy = clf.evaluate(x_test, y_test)[1] assert accuracy >= 0.92 def test_titaninc_accuracy_over_77(tmp_path): TRAIN_DATA_URL = ( "https://storage.googleapis.com/tf-datasets/titanic/train.csv" ) TEST_DATA_URL = ( "https://storage.googleapis.com/tf-datasets/titanic/eval.csv" ) train_file_path = keras.utils.get_file("train.csv", TRAIN_DATA_URL) test_file_path = keras.utils.get_file("eval.csv", TEST_DATA_URL) clf = ak.StructuredDataClassifier(max_trials=10, directory=tmp_path) clf.fit(train_file_path, "survived") accuracy = clf.evaluate(test_file_path, "survived")[1] assert accuracy >= 0.77 ================================================ FILE: benchmark/run.py ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import statistics import sys from benchmark import experiments as exp_module def generate_report(experiments): report = [ ",".join( [ "dataset_name", "average_time", "metrics_average", "metrics_standard_deviation", ] ) ] for experiment in experiments: total_times, metric_values = experiment.run(repeat_times=10) mean_time = statistics.mean(total_times) mean = statistics.mean(metric_values) std = statistics.stdev(metric_values) report.append( ",".join([experiment.name, str(mean_time), str(mean), str(std)]) ) return "\n".join(report) def main(argv): task = sys.argv[1] path = sys.argv[2] report = generate_report(exp_module.get_experiments(task)) with open(path, "w") as file: file.write(report) if __name__ == "__main__": main(sys.argv) ================================================ FILE: codecov.yml ================================================ coverage: status: project: default: target: 100% patch: default: target: 100% ================================================ FILE: docker/Dockerfile ================================================ FROM tensorflow/tensorflow:2.3.0 WORKDIR /opt/autokeras COPY . . RUN python -m pip install --no-cache-dir --editable . WORKDIR /work ================================================ FILE: docker/Makefile ================================================ help: @cat Makefile DATA?="${HOME}/Data" GPUS?=all DOCKER_FILE?=Dockerfile DOCKER=docker TF_VERSION=2.3.0 TEST=tests/ SRC?=$(shell dirname `pwd`) build: docker build -t autokeras --build-arg TF_VERSION=$(TF_VERSION) -f $(DOCKER_FILE) . bash: build $(DOCKER) run --gpus $(GPUS) -it -v $(SRC):/src/workspace -v $(DATA):/data autokeras bash ipython: build $(DOCKER) run --gpus $(GPUS) -it -v $(SRC):/src/workspace -v $(DATA):/data --env autokeras ipython notebook: build $(DOCKER) run --gpus $(GPUS) -it -v $(SRC):/src/workspace -v $(DATA):/data --net=host --env autokeras test: build $(DOCKER) run --gpus $(GPUS) -it -v $(SRC):/src/workspace -v $(DATA):/data --env autokeras py.test $(TEST) ================================================ FILE: docker/README.md ================================================ # Using Autokeras via Docker This directory contains `Dockerfile` to make it easy to get up and running with Autokeras via [Docker](http://www.docker.com/). ## Installing Docker General installation instructions are [on the Docker site](https://docs.docker.com/installation/), but we give some quick links here: * [OSX](https://docs.docker.com/installation/mac/): [docker toolbox](https://www.docker.com/toolbox) * [ubuntu](https://docs.docker.com/installation/ubuntulinux/) ## Running the container We are using `Makefile` to simplify docker commands within make commands. Build the container and start a Jupyter Notebook $ make notebook Build the container and start an iPython shell $ make ipython Build the container and start a bash $ make bash For GPU support install NVIDIA drivers (ideally latest) and [nvidia-docker](https://github.com/NVIDIA/nvidia-docker). Run using $ make notebook GPU=all # or [ipython, bash] Mount a volume for external data sets $ make DATA=~/mydata Prints all make tasks $ make help Note: If you would have a problem running nvidia-docker you may try the old way we have used. But it is not recommended. If you find a bug in the nvidia-docker report it there please and try using the nvidia-docker as described above. $ export CUDA_SO=$(\ls /usr/lib/x86_64-linux-gnu/libcuda.* | xargs -I{} echo '-v {}:{}') $ export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') $ docker run -it -p 8888:8888 $CUDA_SO $DEVICES gcr.io/tensorflow/tensorflow:latest-gpu ================================================ FILE: docker/devel.Dockerfile ================================================ FROM ubuntu:18.04 RUN mkdir /app WORKDIR /app RUN apt-get update && apt-get install -y --no-install-recommends \ python3.6 \ curl \ ca-certificates \ build-essential \ python3.6-dev \ python3-distutils \ git \ gcc \ && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* RUN curl -O https://bootstrap.pypa.io/get-pip.py && \ python3.6 get-pip.py && \ rm get-pip.py RUN git clone https://github.com/keras-team/autokeras.git RUN python3.6 -m pip install -U keras RUN cd autokeras && python3.6 -m pip install -U . && cd .. RUN rm -rf autokeras CMD /bin/bash ================================================ FILE: docker/pre-commit.Dockerfile ================================================ FROM python:3.7 RUN pip install flake8 black isort WORKDIR /autokeras CMD ["python", "docker/pre_commit.py"] ================================================ FILE: docker/pre_commit.py ================================================ from subprocess import CalledProcessError from subprocess import check_call def check_bash_call(string): check_call(["bash", "-c", string]) def _run_format_and_flake8(): files_changed = False try: check_bash_call("sh shell/lint.sh") except CalledProcessError: check_bash_call("sh shell/format.sh") files_changed = True if files_changed: print("Some files have changed.") print("Please do git add and git commit again") else: print("No formatting needed.") if files_changed: exit(1) def run_format_and_flake8(): try: _run_format_and_flake8() except CalledProcessError as error: print("Pre-commit returned exit code", error.returncode) exit(error.returncode) if __name__ == "__main__": run_format_and_flake8() ================================================ FILE: docs/README.md ================================================ # AutoKeras Documentation The source for AutoKeras documentation is in this directory. Our documentation uses extended Markdown, as implemented by [MkDocs](http://mkdocs.org). ## Building the documentation - Install dependencies: `pip install -r docs/requirements.txt` - `pip install -e .` to make sure that Python will import your modified version of AutoKeras. - From the root directory, `cd` into the `docs/` folder and run: - `python autogen.py` - `mkdocs serve` # Starts a local webserver: [localhost:8000](http://localhost:8000) - `mkdocs build` # Builds a static site in `site/` directory ## Generate contributors list - Prerequisites: - Install Pillow: `pip install Pillow` - Generate: - Run: `sh shell/contributors.sh` - The generated file is: `docs/templates/img/contributors.svg` ================================================ FILE: docs/autogen.py ================================================ import os import pathlib import shutil import keras_autodoc import tutobooks PAGES = { "image_classifier.md": [ "autokeras.ImageClassifier", "autokeras.ImageClassifier.fit", "autokeras.ImageClassifier.predict", "autokeras.ImageClassifier.evaluate", "autokeras.ImageClassifier.export_model", ], "image_regressor.md": [ "autokeras.ImageRegressor", "autokeras.ImageRegressor.fit", "autokeras.ImageRegressor.predict", "autokeras.ImageRegressor.evaluate", "autokeras.ImageRegressor.export_model", ], "text_classifier.md": [ "autokeras.TextClassifier", "autokeras.TextClassifier.fit", "autokeras.TextClassifier.predict", "autokeras.TextClassifier.evaluate", "autokeras.TextClassifier.export_model", ], "text_regressor.md": [ "autokeras.TextRegressor", "autokeras.TextRegressor.fit", "autokeras.TextRegressor.predict", "autokeras.TextRegressor.evaluate", "autokeras.TextRegressor.export_model", ], "structured_data_classifier.md": [ "autokeras.StructuredDataClassifier", "autokeras.StructuredDataClassifier.fit", "autokeras.StructuredDataClassifier.predict", "autokeras.StructuredDataClassifier.evaluate", "autokeras.StructuredDataClassifier.export_model", ], "structured_data_regressor.md": [ "autokeras.StructuredDataRegressor", "autokeras.StructuredDataRegressor.fit", "autokeras.StructuredDataRegressor.predict", "autokeras.StructuredDataRegressor.evaluate", "autokeras.StructuredDataRegressor.export_model", ], "auto_model.md": [ "autokeras.AutoModel", "autokeras.AutoModel.fit", "autokeras.AutoModel.predict", "autokeras.AutoModel.evaluate", "autokeras.AutoModel.export_model", ], "base.md": [ "autokeras.Node", "autokeras.Block", "autokeras.Block.build", "autokeras.Head", ], "node.md": [ "autokeras.ImageInput", "autokeras.Input", "autokeras.TextInput", "autokeras.StructuredDataInput", ], "block.md": [ "autokeras.ConvBlock", "autokeras.DenseBlock", "autokeras.Embedding", "autokeras.Merge", "autokeras.ResNetBlock", "autokeras.RNNBlock", "autokeras.SpatialReduction", "autokeras.TemporalReduction", "autokeras.XceptionBlock", "autokeras.StructuredDataBlock", "autokeras.ImageBlock", "autokeras.TextBlock", "autokeras.ImageAugmentation", "autokeras.Normalization", "autokeras.ClassificationHead", "autokeras.RegressionHead", ], } aliases_needed = [ "keras.callbacks.Callback", "keras.losses.Loss", "keras.metrics.Metric", ] ROOT = "http://autokeras.com/" autokeras_dir = pathlib.Path(__file__).resolve().parents[1] def py_to_nb_md(dest_dir): dir_path = "py" for file_path in os.listdir("py/"): file_name = file_path py_path = os.path.join(dir_path, file_path) file_name_no_ext = os.path.splitext(file_name)[0] ext = os.path.splitext(file_name)[1] if ext != ".py": continue nb_path = os.path.join("ipynb", file_name_no_ext + ".ipynb") md_path = os.path.join(dest_dir, "tutorial", file_name_no_ext + ".md") tutobooks.py_to_md(py_path, nb_path, md_path, "templates/img") github_repo_dir = "keras-team/autokeras/blob/master/docs/" with open(md_path, "r") as md_file: button_lines = [ ":material-link: " "[**View in Colab**](https://colab.research.google.com/github/" + github_repo_dir + "ipynb/" + file_name_no_ext + ".ipynb" + ")    " # + '' + ":octicons-mark-github-16: " "[**GitHub source**](https://github.com/" + github_repo_dir + "py/" + file_name_no_ext + ".py)", "\n", ] md_content = "".join(button_lines) + "\n" + md_file.read() with open(md_path, "w") as md_file: md_file.write(md_content) def generate(dest_dir): template_dir = autokeras_dir / "docs" / "templates" doc_generator = keras_autodoc.DocumentationGenerator( PAGES, "https://github.com/keras-team/autokeras/blob/master", template_dir, autokeras_dir / "examples", extra_aliases=aliases_needed, ) doc_generator.generate(dest_dir) readme = (autokeras_dir / "README.md").read_text() index = (template_dir / "index.md").read_text() index = index.replace("{{autogenerated}}", readme[readme.find("##") :]) (dest_dir / "index.md").write_text(index, encoding="utf-8") shutil.copyfile( autokeras_dir / ".github" / "CONTRIBUTING.md", dest_dir / "contributing.md", ) py_to_nb_md(dest_dir) if __name__ == "__main__": generate(autokeras_dir / "docs" / "sources") ================================================ FILE: docs/ipynb/customized.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "!export KERAS_BACKEND=\"torch\"\n", "!pip install autokeras" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "import keras\n", "import numpy as np\n", "import tree\n", "from keras.datasets import mnist\n", "\n", "import autokeras as ak" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "In this tutorial, we show how to customize your search space with\n", "[AutoModel](/auto_model/#automodel-class) and how to implement your own block\n", "as search space. This API is mainly for advanced users who already know what\n", "their model should look like.\n", "\n", "## Customized Search Space\n", "First, let us see how we can build the following neural network using the\n", "building blocks in AutoKeras.\n", "\n", "
\n", "graph LR\n", " id1(ImageInput) --> id2(Normalization)\n", " id2 --> id3(Image Augmentation)\n", " id3 --> id4(Convolutional)\n", " id3 --> id5(ResNet V2)\n", " id4 --> id6(Merge)\n", " id5 --> id6\n", " id6 --> id7(Classification Head)\n", "
\n", "\n", "We can make use of the [AutoModel](/auto_model/#automodel-class) API in\n", "AutoKeras to implemented as follows.\n", "The usage is the same as the [Keras functional\n", "API](https://keras.io/api/models/model/#with-the-functional-api).\n", "Since this is just a demo, we use small amount of `max_trials` and `epochs`.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "input_node = ak.ImageInput()\n", "output_node = ak.Normalization()(input_node)\n", "output_node1 = ak.ConvBlock()(output_node)\n", "output_node2 = ak.ResNetBlock(version=\"v2\")(output_node)\n", "output_node = ak.Merge()([output_node1, output_node2])\n", "output_node = ak.ClassificationHead()(output_node)\n", "\n", "auto_model = ak.AutoModel(\n", " inputs=input_node, outputs=output_node, overwrite=True, max_trials=1\n", ")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "Whild building the model, the blocks used need to follow this topology:\n", "`Preprocessor` -> `Block` -> `Head`. `Normalization` and `ImageAugmentation`\n", "are `Preprocessor`s.\n", "`ClassificationHead` is `Head`. The rest are `Block`s.\n", "\n", "In the code above, we use `ak.ResNetBlock(version='v2')` to specify the version\n", "of ResNet to use. There are many other arguments to specify for each building\n", "block. For most of the arguments, if not specified, they would be tuned\n", "automatically. Please refer to the documentation links at the bottom of the\n", "page for more details.\n", "\n", "Then, we prepare some data to run the model.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n", "print(x_train.shape) # (60000, 28, 28)\n", "print(y_train.shape) # (60000,)\n", "print(y_train[:3]) # array([7, 2, 1], dtype=uint8)\n", "\n", "# Feed the AutoModel with training data.\n", "auto_model.fit(x_train[:100], y_train[:100], epochs=1)\n", "# Predict with the best model.\n", "predicted_y = auto_model.predict(x_test)\n", "# Evaluate the best model with testing data.\n", "print(auto_model.evaluate(x_test, y_test))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "For multiple input nodes and multiple heads search space, you can refer to\n", "[this section](/tutorial/multi/#customized-search-space).\n", "\n", "## Validation Data\n", "If you would like to provide your own validation data or change the ratio of\n", "the validation data, please refer to the Validation Data section of the\n", "tutorials of [Image\n", "Classification](/tutorial/image_classification/#validation-data), [Text\n", "Classification](/tutorial/text_classification/#validation-data), [Structured\n", "Data\n", "Classification](/tutorial/structured_data_classification/#validation-data),\n", "[Multi-task and Multiple Validation](/tutorial/multi/#validation-data).\n", "\n", "## Implement New Block\n", "\n", "You can extend the [Block](/base/#block-class)\n", "class to implement your own building blocks and use it with\n", "[AutoModel](/auto_model/#automodel-class).\n", "\n", "The first step is to learn how to write a build function for\n", "[KerasTuner](https://keras-team.github.io/keras-tuner/#usage-the-basics). You\n", "need to override the [build function](/base/#build-method) of the block. The\n", "following example shows how to implement a single Dense layer block whose\n", "number of neurons is tunable.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "class SingleDenseLayerBlock(ak.Block):\n", " def build(self, hp, inputs=None):\n", " # Get the input_node from inputs.\n", " input_node = tree.flatten(inputs)[0]\n", " layer = keras.layers.Dense(\n", " hp.Int(\"num_units\", min_value=32, max_value=512, step=32)\n", " )\n", " output_node = layer(input_node)\n", " return output_node" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "You can connect it with other blocks and build it into an\n", "[AutoModel](/auto_model/#automodel-class).\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "# Build the AutoModel\n", "input_node = ak.Input()\n", "output_node = SingleDenseLayerBlock()(input_node)\n", "output_node = ak.RegressionHead()(output_node)\n", "auto_model = ak.AutoModel(input_node, output_node, overwrite=True, max_trials=1)\n", "# Prepare Data\n", "num_instances = 100\n", "x_train = np.random.rand(num_instances, 20).astype(np.float32)\n", "y_train = np.random.rand(num_instances, 1).astype(np.float32)\n", "x_test = np.random.rand(num_instances, 20).astype(np.float32)\n", "y_test = np.random.rand(num_instances, 1).astype(np.float32)\n", "# Train the model\n", "auto_model.fit(x_train, y_train, epochs=1)\n", "print(auto_model.evaluate(x_test, y_test))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## Reference\n", "\n", "[AutoModel](/auto_model/#automodel-class)\n", "\n", "**Nodes**:\n", "[ImageInput](/node/#imageinput-class),\n", "[Input](/node/#input-class),\n", "[TextInput](/node/#textinput-class).\n", "[StructuredDataInput](/node/#structureddatainput-class),\n", "\n", "**Preprocessors**:\n", "[FeatureEngineering](/block/#featureengineering-class),\n", "[ImageAugmentation](/block/#imageaugmentation-class),\n", "[LightGBM](/block/#lightgbm-class),\n", "[Normalization](/block/#normalization-class),\n", "\n", "**Blocks**:\n", "[ConvBlock](/block/#convblock-class),\n", "[DenseBlock](/block/#denseblock-class),\n", "[Embedding](/block/#embedding-class),\n", "[Merge](/block/#merge-class),\n", "[ResNetBlock](/block/#resnetblock-class),\n", "[RNNBlock](/block/#rnnblock-class),\n", "[SpatialReduction](/block/#spatialreduction-class),\n", "[TemporalReduction](/block/#temporalreduction-class),\n", "[XceptionBlock](/block/#xceptionblock-class),\n", "[ImageBlock](/block/#imageblock-class),\n", "[TextBlock](/block/#textblock-class).\n", "[StructuredDataBlock](/block/#structureddatablock-class),\n" ] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "customized", "private_outputs": false, "provenance": [], "toc_visible": true }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" } }, "nbformat": 4, "nbformat_minor": 0 } ================================================ FILE: docs/ipynb/export.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "!export KERAS_BACKEND=\"torch\"\n", "!pip install autokeras" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "import numpy as np\n", "from keras.datasets import mnist\n", "from keras.models import load_model\n", "\n", "import autokeras as ak" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "You can easily export your model the best model found by AutoKeras as a Keras\n", "Model.\n", "\n", "The following example uses [ImageClassifier](/image_classifier) as an example.\n", "All the tasks and the [AutoModel](/auto_model/#automodel-class) has this\n", "[export_model](/auto_model/#export_model-method) function.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n", "\n", "# Initialize the image classifier.\n", "clf = ak.ImageClassifier(\n", " overwrite=True, max_trials=1\n", ") # Try only 1 model.(Increase accordingly)\n", "# Feed the image classifier with training data.\n", "clf.fit(x_train, y_train, epochs=1) # Change no of epochs to improve the model\n", "# Export as a Keras Model.\n", "model = clf.export_model()\n", "\n", "print(type(model)) # \n", "\n", "model.save(\"model_autokeras.keras\")\n", "\n", "\n", "loaded_model = load_model(\n", " \"model_autokeras.keras\", custom_objects=ak.CUSTOM_OBJECTS\n", ")\n", "\n", "predicted_y = loaded_model.predict(np.expand_dims(x_test, -1))\n", "print(predicted_y)" ] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "export", "private_outputs": false, "provenance": [], "toc_visible": true }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" } }, "nbformat": 4, "nbformat_minor": 0 } ================================================ FILE: docs/ipynb/image_classification.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "!export KERAS_BACKEND=\"torch\"\n", "!pip install autokeras" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "from keras.datasets import mnist\n", "\n", "import autokeras as ak" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## A Simple Example\n", "The first step is to prepare your data. Here we use the MNIST dataset as an\n", "example\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n", "x_train = x_train[:100]\n", "y_train = y_train[:100]\n", "x_test = x_test[:100]\n", "y_test = y_test[:100]\n", "print(x_train.shape) # (60000, 28, 28)\n", "print(y_train.shape) # (60000,)\n", "print(y_train[:3]) # array([7, 2, 1], dtype=uint8)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "The second step is to run the ImageClassifier.\n", "It is recommended have more trials for more complicated datasets.\n", "This is just a quick demo of MNIST, so we set max_trials to 1.\n", "For the same reason, we set epochs to 10.\n", "You can also leave the epochs unspecified for an adaptive number of epochs.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "# Initialize the image classifier.\n", "clf = ak.ImageClassifier(overwrite=True, max_trials=1)\n", "# Feed the image classifier with training data.\n", "clf.fit(x_train, y_train, epochs=1)\n", "\n", "\n", "# Predict with the best model.\n", "predicted_y = clf.predict(x_test)\n", "print(predicted_y)\n", "\n", "\n", "# Evaluate the best model with testing data.\n", "print(clf.evaluate(x_test, y_test))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## Validation Data\n", "By default, AutoKeras use the last 20% of training data as validation data. As\n", "shown in the example below, you can use validation_split to specify the\n", "percentage.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "clf.fit(\n", " x_train,\n", " y_train,\n", " # Split the training data and use the last 15% as validation data.\n", " validation_split=0.15,\n", " epochs=1,\n", ")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "You can also use your own validation set instead of splitting it from the\n", "training data with validation_data.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "split = 50000\n", "x_val = x_train[split:]\n", "y_val = y_train[split:]\n", "x_train = x_train[:split]\n", "y_train = y_train[:split]\n", "clf.fit(\n", " x_train,\n", " y_train,\n", " # Use your own validation set.\n", " validation_data=(x_val, y_val),\n", " epochs=1,\n", ")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## Customized Search Space\n", "For advanced users, you may customize your search space by using AutoModel\n", "instead of ImageClassifier. You can configure the ImageBlock for some\n", "high-level configurations, e.g., block_type for the type of neural network to\n", "search, normalize for whether to do data normalization, augment for whether to\n", "do data augmentation. You can also do not specify these arguments, which would\n", "leave the different choices to be tuned automatically. See the following\n", "example for detail.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "input_node = ak.ImageInput()\n", "output_node = ak.ImageBlock(\n", " # Only search ResNet architectures.\n", " block_type=\"resnet\",\n", " # Normalize the dataset.\n", " normalize=True,\n", " # Do not do data augmentation.\n", " augment=False,\n", ")(input_node)\n", "output_node = ak.ClassificationHead()(output_node)\n", "clf = ak.AutoModel(\n", " inputs=input_node, outputs=output_node, overwrite=True, max_trials=1\n", ")\n", "clf.fit(x_train, y_train, epochs=1)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "The usage of AutoModel is similar to the functional API of Keras. Basically, you\n", "are building a graph, whose edges are blocks and the nodes are intermediate\n", "outputs of blocks. To add an edge from input_node to output_node with\n", "output_node = ak.[some_block]([block_args])(input_node).\n", "\n", "You can even also use more fine grained blocks to customize the search space\n", "even further. See the following example.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "input_node = ak.ImageInput()\n", "output_node = ak.Normalization()(input_node)\n", "output_node = ak.ImageAugmentation(horizontal_flip=False)(output_node)\n", "output_node = ak.ResNetBlock(version=\"v2\")(output_node)\n", "output_node = ak.ClassificationHead()(output_node)\n", "clf = ak.AutoModel(\n", " inputs=input_node, outputs=output_node, overwrite=True, max_trials=1\n", ")\n", "clf.fit(x_train, y_train, epochs=1)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## Reference\n", "[ImageClassifier](/image_classifier),\n", "[AutoModel](/auto_model/#automodel-class),\n", "[ImageBlock](/block/#imageblock-class),\n", "[Normalization](/block/#normalization-class),\n", "[ImageAugmentation](/block/#image-augmentation-class),\n", "[ResNetBlock](/block/#resnetblock-class),\n", "[ImageInput](/node/#imageinput-class),\n", "[ClassificationHead](/block/#classificationhead-class).\n" ] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "image_classification", "private_outputs": false, "provenance": [], "toc_visible": true }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" } }, "nbformat": 4, "nbformat_minor": 0 } ================================================ FILE: docs/ipynb/image_regression.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "!export KERAS_BACKEND=\"torch\"\n", "!pip install autokeras" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "from keras.datasets import mnist\n", "\n", "import autokeras as ak" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "To make this tutorial easy to follow, we just treat MNIST dataset as a\n", "regression dataset. It means we will treat prediction targets of MNIST dataset,\n", "which are integers ranging from 0 to 9 as numerical values, so that they can be\n", "directly used as the regression targets.\n", "\n", "## A Simple Example\n", "The first step is to prepare your data. Here we use the MNIST dataset as an\n", "example\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n", "x_train = x_train[:100]\n", "y_train = y_train[:100]\n", "x_test = x_test[:100]\n", "y_test = y_test[:100]\n", "print(x_train.shape) # (60000, 28, 28)\n", "print(y_train.shape) # (60000,)\n", "print(y_train[:3]) # array([7, 2, 1], dtype=uint8)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "The second step is to run the ImageRegressor. It is recommended have more\n", "trials for more complicated datasets. This is just a quick demo of MNIST, so\n", "we set max_trials to 1. For the same reason, we set epochs to 1. You can also\n", "leave the epochs unspecified for an adaptive number of epochs.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "# Initialize the image regressor.\n", "reg = ak.ImageRegressor(overwrite=True, max_trials=1)\n", "# Feed the image regressor with training data.\n", "reg.fit(x_train, y_train, epochs=1)\n", "\n", "\n", "# Predict with the best model.\n", "predicted_y = reg.predict(x_test)\n", "print(predicted_y)\n", "\n", "\n", "# Evaluate the best model with testing data.\n", "print(reg.evaluate(x_test, y_test))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## Validation Data\n", "By default, AutoKeras use the last 20% of training data as validation data. As\n", "shown in the example below, you can use validation_split to specify the\n", "percentage.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "reg.fit(\n", " x_train,\n", " y_train,\n", " # Split the training data and use the last 15% as validation data.\n", " validation_split=0.15,\n", " epochs=1,\n", ")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "You can also use your own validation set instead of splitting it from the\n", "training data with validation_data.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "split = 50000\n", "x_val = x_train[split:]\n", "y_val = y_train[split:]\n", "x_train = x_train[:split]\n", "y_train = y_train[:split]\n", "reg.fit(\n", " x_train,\n", " y_train,\n", " # Use your own validation set.\n", " validation_data=(x_val, y_val),\n", " epochs=2,\n", ")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## Customized Search Space\n", "For advanced users, you may customize your search space by using AutoModel\n", "instead of ImageRegressor. You can configure the ImageBlock for some high-level\n", "configurations, e.g., block_type for the type of neural network to search,\n", "normalize for whether to do data normalization, augment for whether to do data\n", "augmentation. You can also do not specify these arguments, which would leave\n", "the different choices to be tuned automatically. See the following example for\n", "detail.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "input_node = ak.ImageInput()\n", "output_node = ak.ImageBlock(\n", " # Only search ResNet architectures.\n", " block_type=\"resnet\",\n", " # Normalize the dataset.\n", " normalize=False,\n", " # Do not do data augmentation.\n", " augment=False,\n", ")(input_node)\n", "output_node = ak.RegressionHead()(output_node)\n", "reg = ak.AutoModel(\n", " inputs=input_node, outputs=output_node, overwrite=True, max_trials=1\n", ")\n", "reg.fit(x_train, y_train, epochs=1)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "The usage of AutoModel is similar to the functional API of Keras. Basically,\n", "you are building a graph, whose edges are blocks and the nodes are intermediate\n", "outputs of blocks. To add an edge from input_node to output_node with\n", "output_node = ak.[some_block]([block_args])(input_node).\n", "\n", "You can even also use more fine grained blocks to customize the search space\n", "even further. See the following example.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "input_node = ak.ImageInput()\n", "output_node = ak.Normalization()(input_node)\n", "output_node = ak.ImageAugmentation(horizontal_flip=False)(output_node)\n", "output_node = ak.ResNetBlock(version=\"v2\")(output_node)\n", "output_node = ak.RegressionHead()(output_node)\n", "reg = ak.AutoModel(\n", " inputs=input_node, outputs=output_node, overwrite=True, max_trials=1\n", ")\n", "reg.fit(x_train, y_train, epochs=1)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## Reference\n", "[ImageRegressor](/image_regressor),\n", "[AutoModel](/auto_model/#automodel-class),\n", "[ImageBlock](/block/#imageblock-class),\n", "[Normalization](/block/#normalization-class),\n", "[ImageAugmentation](/block/#image-augmentation-class),\n", "[ResNetBlock](/block/#resnetblock-class),\n", "[ImageInput](/node/#imageinput-class),\n", "[RegressionHead](/block/#regressionhead-class).\n" ] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "image_regression", "private_outputs": false, "provenance": [], "toc_visible": true }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" } }, "nbformat": 4, "nbformat_minor": 0 } ================================================ FILE: docs/ipynb/multi.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "!export KERAS_BACKEND=\"torch\"\n", "!pip install autokeras" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "import numpy as np\n", "\n", "import autokeras as ak" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "In this tutorial we are making use of the\n", "[AutoModel](/auto_model/#automodel-class)\n", " API to show how to handle multi-modal data and multi-task.\n", "\n", "## What is multi-modal?\n", "\n", "Multi-modal data means each data instance has multiple forms of information.\n", "For example, a photo can be saved as a image. Besides the image, it may also\n", "have when and where it was taken as its attributes, which can be represented as\n", "numerical data.\n", "\n", "## What is multi-task?\n", "\n", "Multi-task here we refer to we want to predict multiple targets with the same\n", "input features. For example, we not only want to classify an image according to\n", "its content, but we also want to regress its quality as a float number between\n", "0 and 1.\n", "\n", "The following diagram shows an example of multi-modal and multi-task neural\n", "network model.\n", "\n", "
\n", "graph TD\n", " id1(ImageInput) --> id3(Some Neural Network Model)\n", " id2(Input) --> id3\n", " id3 --> id4(ClassificationHead)\n", " id3 --> id5(RegressionHead)\n", "
\n", "\n", "It has two inputs the images and the numerical input data. Each image is\n", "associated with a set of attributes in the numerical input data. From these\n", "data, we are trying to predict the classification label and the regression value\n", "at the same time.\n", "\n", "## Data Preparation\n", "\n", "To illustrate our idea, we generate some random image and numerical data as\n", "the multi-modal data.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "num_instances = 10\n", "# Generate image data.\n", "image_data = np.random.rand(num_instances, 32, 32, 3).astype(np.float32)\n", "# Generate numerical data.\n", "numerical_data = np.random.rand(num_instances, 20).astype(np.float32)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "We also generate some multi-task targets for classification and regression.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "# Generate regression targets.\n", "regression_target = np.random.rand(num_instances, 1).astype(np.float32)\n", "# Generate classification labels of five classes.\n", "classification_target = np.random.randint(5, size=num_instances)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## Build and Train the Model\n", "Then we initialize the multi-modal and multi-task model with\n", "[AutoModel](/auto_model/#automodel-class).\n", "Since this is just a demo, we use small amount of `max_trials` and `epochs`.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "# Initialize the multi with multiple inputs and outputs.\n", "model = ak.AutoModel(\n", " inputs=[ak.ImageInput(), ak.Input()],\n", " outputs=[\n", " ak.RegressionHead(metrics=[\"mae\"]),\n", " ak.ClassificationHead(\n", " loss=\"categorical_crossentropy\", metrics=[\"accuracy\"]\n", " ),\n", " ],\n", " overwrite=True,\n", " max_trials=2,\n", ")\n", "# Fit the model with prepared data.\n", "model.fit(\n", " [image_data, numerical_data],\n", " [regression_target, classification_target],\n", " epochs=1,\n", " batch_size=3,\n", ")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## Validation Data\n", "By default, AutoKeras use the last 20% of training data as validation data.\n", "As shown in the example below, you can use `validation_split` to specify the\n", "percentage.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "model.fit(\n", " [image_data, numerical_data],\n", " [regression_target, classification_target],\n", " # Split the training data and use the last 15% as validation data.\n", " validation_split=0.15,\n", " epochs=1,\n", " batch_size=3,\n", ")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "You can also use your own validation set\n", "instead of splitting it from the training data with `validation_data`.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "split = 5\n", "\n", "image_val = image_data[split:]\n", "numerical_val = numerical_data[split:]\n", "regression_val = regression_target[split:]\n", "classification_val = classification_target[split:]\n", "\n", "image_data = image_data[:split]\n", "numerical_data = numerical_data[:split]\n", "regression_target = regression_target[:split]\n", "classification_target = classification_target[:split]\n", "\n", "model.fit(\n", " [image_data, numerical_data],\n", " [regression_target, classification_target],\n", " # Use your own validation set.\n", " validation_data=(\n", " [image_val, numerical_val],\n", " [regression_val, classification_val],\n", " ),\n", " epochs=1,\n", " batch_size=3,\n", ")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## Customized Search Space\n", "You can customize your search space.\n", "The following figure shows the search space we want to define.\n", "\n", "
\n", "graph LR\n", " id1(ImageInput) --> id2(Normalization)\n", " id2 --> id3(Image Augmentation)\n", " id3 --> id4(Convolutional)\n", " id3 --> id5(ResNet V2)\n", " id4 --> id6(Merge)\n", " id5 --> id6\n", " id7(Input) --> id9(DenseBlock)\n", " id6 --> id10(Merge)\n", " id9 --> id10\n", " id10 --> id11(Classification Head)\n", " id10 --> id12(Regression Head)\n", "
\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "input_node1 = ak.ImageInput()\n", "output_node = ak.Normalization()(input_node1)\n", "output_node = ak.ImageAugmentation()(output_node)\n", "output_node1 = ak.ConvBlock()(output_node)\n", "output_node2 = ak.ResNetBlock(version=\"v2\")(output_node)\n", "output_node1 = ak.Merge()([output_node1, output_node2])\n", "\n", "input_node2 = ak.Input()\n", "output_node2 = ak.DenseBlock()(input_node2)\n", "\n", "output_node = ak.Merge()([output_node1, output_node2])\n", "output_node1 = ak.ClassificationHead()(output_node)\n", "output_node2 = ak.RegressionHead()(output_node)\n", "\n", "auto_model = ak.AutoModel(\n", " inputs=[input_node1, input_node2],\n", " outputs=[output_node1, output_node2],\n", " overwrite=True,\n", " max_trials=2,\n", ")\n", "\n", "image_data = np.random.rand(num_instances, 32, 32, 3).astype(np.float32)\n", "numerical_data = np.random.rand(num_instances, 20).astype(np.float32)\n", "regression_target = np.random.rand(num_instances, 1).astype(np.float32)\n", "classification_target = np.random.randint(5, size=num_instances)\n", "\n", "auto_model.fit(\n", " [image_data, numerical_data],\n", " [classification_target, regression_target],\n", " batch_size=3,\n", " epochs=1,\n", ")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## Reference\n", "[AutoModel](/auto_model/#automodel-class),\n", "[ImageInput](/node/#imageinput-class),\n", "[Input](/node/#input-class),\n", "[DenseBlock](/block/#denseblock-class),\n", "[RegressionHead](/block/#regressionhead-class),\n", "[ClassificationHead](/block/#classificationhead-class),\n", "[CategoricalToNumerical](/block/#categoricaltonumerical-class).\n" ] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "multi", "private_outputs": false, "provenance": [], "toc_visible": true }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" } }, "nbformat": 4, "nbformat_minor": 0 } ================================================ FILE: docs/ipynb/structured_data_classification.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "!export KERAS_BACKEND=\"torch\"\n", "!pip install autokeras" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "import keras\n", "import pandas as pd\n", "\n", "import autokeras as ak" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## A Simple Example\n", "The first step is to prepare your data. Here we use the [Titanic\n", "dataset](https://www.kaggle.com/c/titanic) as an example.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "TRAIN_DATA_URL = \"https://storage.googleapis.com/tf-datasets/titanic/train.csv\"\n", "TEST_DATA_URL = \"https://storage.googleapis.com/tf-datasets/titanic/eval.csv\"\n", "\n", "train_file_path = keras.utils.get_file(\"train.csv\", TRAIN_DATA_URL)\n", "test_file_path = keras.utils.get_file(\"eval.csv\", TEST_DATA_URL)\n", "\n", "# Load data into numpy arrays\n", "train_df = pd.read_csv(train_file_path)\n", "test_df = pd.read_csv(test_file_path)\n", "\n", "y_train = train_df[\"survived\"].values\n", "x_train = train_df.drop(\"survived\", axis=1).values\n", "\n", "y_test = test_df[\"survived\"].values\n", "x_test = test_df.drop(\"survived\", axis=1).values" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "The second step is to run the\n", "[StructuredDataClassifier](/structured_data_classifier).\n", "As a quick demo, we set epochs to 10.\n", "You can also leave the epochs unspecified for an adaptive number of epochs.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "# Initialize the structured data classifier.\n", "clf = ak.StructuredDataClassifier(\n", " overwrite=True, max_trials=3\n", ") # It tries 3 different models.\n", "# Feed the structured data classifier with training data.\n", "clf.fit(\n", " x_train,\n", " y_train,\n", " epochs=10,\n", ")\n", "# Predict with the best model.\n", "predicted_y = clf.predict(x_test)\n", "# Evaluate the best model with testing data.\n", "print(clf.evaluate(x_test, y_test))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "You can also specify the column names and types for the data as follows. The\n", "`column_names` is optional if the training data already have the column names,\n", "e.g. pandas.DataFrame, CSV file. Any column, whose type is not specified will\n", "be inferred from the training data.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "# Initialize the structured data classifier.\n", "clf = ak.StructuredDataClassifier(\n", " column_names=[\n", " \"sex\",\n", " \"age\",\n", " \"n_siblings_spouses\",\n", " \"parch\",\n", " \"fare\",\n", " \"class\",\n", " \"deck\",\n", " \"embark_town\",\n", " \"alone\",\n", " ],\n", " column_types={\"sex\": \"categorical\", \"fare\": \"numerical\"},\n", " max_trials=10, # It tries 10 different models.\n", " overwrite=True,\n", ")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## Validation Data\n", "By default, AutoKeras use the last 20% of training data as validation data. As\n", "shown in the example below, you can use `validation_split` to specify the\n", "percentage.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "clf.fit(\n", " x_train,\n", " y_train,\n", " # Split the training data and use the last 15% as validation data.\n", " validation_split=0.15,\n", " epochs=10,\n", ")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "You can also use your own validation set\n", "instead of splitting it from the training data with `validation_data`.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "split = 500\n", "x_val = x_train[split:]\n", "y_val = y_train[split:]\n", "x_train = x_train[:split]\n", "y_train = y_train[:split]\n", "clf.fit(\n", " x_train,\n", " y_train,\n", " # Use your own validation set.\n", " validation_data=(x_val, y_val),\n", " epochs=10,\n", ")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## Reference\n", "[StructuredDataClassifier](/structured_data_classifier),\n", "[AutoModel](/auto_model/#automodel-class),\n", "[StructuredDataBlock](/block/#structureddatablock-class),\n", "[DenseBlock](/block/#denseblock-class),\n", "[StructuredDataInput](/node/#structureddatainput-class),\n", "[ClassificationHead](/block/#classificationhead-class),\n" ] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "structured_data_classification", "private_outputs": false, "provenance": [], "toc_visible": true }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" } }, "nbformat": 4, "nbformat_minor": 0 } ================================================ FILE: docs/ipynb/structured_data_regression.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "!export KERAS_BACKEND=\"torch\"\n", "!pip install autokeras" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "from sklearn.datasets import fetch_california_housing\n", "\n", "import autokeras as ak" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## A Simple Example\n", "The first step is to prepare your data. Here we use the [California housing\n", "dataset](\n", "https://scikit-learn.org/stable/datasets/real_world.html#california-housing-dataset)\n", "as an example.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "house_dataset = fetch_california_housing()\n", "train_size = int(house_dataset.data.shape[0] * 0.9)\n", "\n", "x_train = house_dataset.data[:train_size]\n", "y_train = house_dataset.target[:train_size]\n", "x_test = house_dataset.data[train_size:]\n", "y_test = house_dataset.target[train_size:]" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "The second step is to run the\n", "[StructuredDataRegressor](/structured_data_regressor).\n", "As a quick demo, we set epochs to 10.\n", "You can also leave the epochs unspecified for an adaptive number of epochs.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "# Initialize the structured data regressor.\n", "reg = ak.StructuredDataRegressor(\n", " overwrite=True, max_trials=3\n", ") # It tries 3 different models.\n", "# Feed the structured data regressor with training data.\n", "reg.fit(\n", " x_train,\n", " y_train,\n", " epochs=10,\n", ")\n", "# Predict with the best model.\n", "predicted_y = reg.predict(x_test)\n", "# Evaluate the best model with testing data.\n", "print(reg.evaluate(x_test, y_test))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "You can also specify the column names and types for the data as follows. The\n", "`column_names` is optional if the training data already have the column names,\n", "e.g. pandas.DataFrame, CSV file. Any column, whose type is not specified will\n", "be inferred from the training data.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "# Initialize the structured data regressor.\n", "reg = ak.StructuredDataRegressor(\n", " column_names=[\n", " \"MedInc\",\n", " \"HouseAge\",\n", " \"AveRooms\",\n", " \"AveBedrms\",\n", " \"Population\",\n", " \"AveOccup\",\n", " \"Latitude\",\n", " \"Longitude\",\n", " ],\n", " column_types={\"MedInc\": \"numerical\", \"Latitude\": \"numerical\"},\n", " max_trials=10, # It tries 10 different models.\n", " overwrite=True,\n", ")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## Validation Data\n", "By default, AutoKeras use the last 20% of training data as validation data. As\n", "shown in the example below, you can use `validation_split` to specify the\n", "percentage.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "reg.fit(\n", " x_train,\n", " y_train,\n", " # Split the training data and use the last 15% as validation data.\n", " validation_split=0.15,\n", " epochs=10,\n", ")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "You can also use your own validation set\n", "instead of splitting it from the training data with `validation_data`.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "split = 500\n", "x_val = x_train[split:]\n", "y_val = y_train[split:]\n", "x_train = x_train[:split]\n", "y_train = y_train[:split]\n", "reg.fit(\n", " x_train,\n", " y_train,\n", " # Use your own validation set.\n", " validation_data=(x_val, y_val),\n", " epochs=10,\n", ")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## Reference\n", "[StructuredDataRegressor](/structured_data_regressor),\n", "[AutoModel](/auto_model/#automodel-class),\n", "[StructuredDataBlock](/block/#structureddatablock-class),\n", "[DenseBlock](/block/#denseblock-class),\n", "[StructuredDataInput](/node/#structureddatainput-class),\n", "[RegressionHead](/block/#regressionhead-class),\n" ] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "structured_data_regression", "private_outputs": false, "provenance": [], "toc_visible": true }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" } }, "nbformat": 4, "nbformat_minor": 0 } ================================================ FILE: docs/ipynb/text_classification.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "!export KERAS_BACKEND=\"torch\"\n", "!pip install autokeras" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "import os\n", "\n", "import keras\n", "import numpy as np\n", "from sklearn.datasets import load_files\n", "\n", "import autokeras as ak" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## A Simple Example\n", "The first step is to prepare your data. Here we use the [IMDB\n", "dataset](https://keras.io/datasets/#imdb-movie-reviews-sentiment-classification)\n", "as an example.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "dataset = keras.utils.get_file(\n", " fname=\"aclImdb.tar.gz\",\n", " origin=\"http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\",\n", " extract=True,\n", ")\n", "\n", "# set path to dataset\n", "IMDB_DATADIR = os.path.join(\n", " os.path.dirname(dataset), \"aclImdb_extracted\", \"aclImdb\"\n", ")\n", "\n", "classes = [\"pos\", \"neg\"]\n", "train_data = load_files(\n", " os.path.join(IMDB_DATADIR, \"train\"), shuffle=True, categories=classes\n", ")\n", "test_data = load_files(\n", " os.path.join(IMDB_DATADIR, \"test\"), shuffle=False, categories=classes\n", ")\n", "\n", "x_train = np.array(train_data.data)[:100]\n", "y_train = np.array(train_data.target)[:100]\n", "x_test = np.array(test_data.data)[:100]\n", "y_test = np.array(test_data.target)[:100]\n", "\n", "print(x_train.shape) # (25000,)\n", "print(y_train.shape) # (25000, 1)\n", "print(x_train[0][:50]) # this film was just brilliant casting" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "The second step is to run the [TextClassifier](/text_classifier). As a quick\n", "demo, we set epochs to 2. You can also leave the epochs unspecified for an\n", "adaptive number of epochs.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "# Initialize the text classifier.\n", "clf = ak.TextClassifier(\n", " overwrite=True, max_trials=1\n", ") # It only tries 1 model as a quick demo.\n", "# Feed the text classifier with training data.\n", "clf.fit(x_train, y_train, epochs=1, batch_size=2)\n", "# Predict with the best model.\n", "predicted_y = clf.predict(x_test)\n", "# Evaluate the best model with testing data.\n", "print(clf.evaluate(x_test, y_test))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## Validation Data\n", "By default, AutoKeras use the last 20% of training data as validation data. As\n", "shown in the example below, you can use `validation_split` to specify the\n", "percentage.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "clf.fit(\n", " x_train,\n", " y_train,\n", " # Split the training data and use the last 15% as validation data.\n", " validation_split=0.15,\n", " epochs=1,\n", " batch_size=2,\n", ")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "You can also use your own validation set instead of splitting it from the\n", "training data with `validation_data`.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "split = 5\n", "x_val = x_train[split:]\n", "y_val = y_train[split:]\n", "x_train = x_train[:split]\n", "y_train = y_train[:split]\n", "clf.fit(\n", " x_train,\n", " y_train,\n", " epochs=1,\n", " # Use your own validation set.\n", " validation_data=(x_val, y_val),\n", " batch_size=2,\n", ")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## Customized Search Space\n", "For advanced users, you may customize your search space by using\n", "[AutoModel](/auto_model/#automodel-class) instead of\n", "[TextClassifier](/text_classifier). You can configure the\n", "[TextBlock](/block/#textblock-class) for some high-level configurations. You can\n", "also do not specify these arguments, which would leave the different choices to\n", "be tuned automatically. See the following example for detail.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "input_node = ak.TextInput()\n", "output_node = ak.TextBlock()(input_node)\n", "output_node = ak.ClassificationHead()(output_node)\n", "clf = ak.AutoModel(\n", " inputs=input_node, outputs=output_node, overwrite=True, max_trials=1\n", ")\n", "clf.fit(x_train, y_train, epochs=1, batch_size=2)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## Reference\n", "[TextClassifier](/text_classifier),\n", "[AutoModel](/auto_model/#automodel-class),\n", "[ConvBlock](/block/#convblock-class),\n", "[TextInput](/node/#textinput-class),\n", "[ClassificationHead](/block/#classificationhead-class).\n" ] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "text_classification", "private_outputs": false, "provenance": [], "toc_visible": true }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" } }, "nbformat": 4, "nbformat_minor": 0 } ================================================ FILE: docs/ipynb/text_regression.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "!export KERAS_BACKEND=\"torch\"\n", "!pip install autokeras" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "import os\n", "\n", "import keras\n", "import numpy as np\n", "from sklearn.datasets import load_files\n", "\n", "import autokeras as ak" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "To make this tutorial easy to follow, we just treat IMDB dataset as a\n", "regression dataset. It means we will treat prediction targets of IMDB dataset,\n", "which are 0s and 1s as numerical values, so that they can be directly used as\n", "the regression targets.\n", "\n", "## A Simple Example\n", "The first step is to prepare your data. Here we use the [IMDB\n", "dataset](https://keras.io/datasets/#imdb-movie-reviews-sentiment-classification)\n", "as an example.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "dataset = keras.utils.get_file(\n", " fname=\"aclImdb.tar.gz\",\n", " origin=\"http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\",\n", " extract=True,\n", ")\n", "\n", "# set path to dataset\n", "IMDB_DATADIR = os.path.join(\n", " os.path.dirname(dataset), \"aclImdb_extracted\", \"aclImdb\"\n", ")\n", "\n", "classes = [\"pos\", \"neg\"]\n", "train_data = load_files(\n", " os.path.join(IMDB_DATADIR, \"train\"), shuffle=True, categories=classes\n", ")\n", "test_data = load_files(\n", " os.path.join(IMDB_DATADIR, \"test\"), shuffle=False, categories=classes\n", ")\n", "\n", "x_train = np.array(train_data.data)[:100]\n", "y_train = np.array(train_data.target)[:100]\n", "x_test = np.array(test_data.data)[:100]\n", "y_test = np.array(test_data.target)[:100]\n", "\n", "print(x_train.shape) # (25000,)\n", "print(y_train.shape) # (25000, 1)\n", "print(x_train[0][:50]) # this film was just brilliant casting " ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "The second step is to run the [TextRegressor](/text_regressor). As a quick\n", "demo, we set epochs to 2. You can also leave the epochs unspecified for an\n", "adaptive number of epochs.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "# Initialize the text regressor.\n", "reg = ak.TextRegressor(\n", " overwrite=True, max_trials=1 # It tries 10 different models.\n", ")\n", "# Feed the text regressor with training data.\n", "reg.fit(x_train, y_train, epochs=1, batch_size=2)\n", "# Predict with the best model.\n", "predicted_y = reg.predict(x_test)\n", "# Evaluate the best model with testing data.\n", "print(reg.evaluate(x_test, y_test))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## Validation Data\n", "By default, AutoKeras use the last 20% of training data as validation data. As\n", "shown in the example below, you can use `validation_split` to specify the\n", "percentage.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "reg.fit(\n", " x_train,\n", " y_train,\n", " # Split the training data and use the last 15% as validation data.\n", " validation_split=0.15,\n", ")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "You can also use your own validation set instead of splitting it from the\n", "training data with `validation_data`.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "split = 5\n", "x_val = x_train[split:]\n", "y_val = y_train[split:]\n", "x_train = x_train[:split]\n", "y_train = y_train[:split]\n", "reg.fit(\n", " x_train,\n", " y_train,\n", " epochs=1,\n", " # Use your own validation set.\n", " validation_data=(x_val, y_val),\n", " batch_size=2,\n", ")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## Customized Search Space\n", "For advanced users, you may customize your search space by using\n", "[AutoModel](/auto_model/#automodel-class) instead of\n", "[TextRegressor](/text_regressor). You can configure the\n", "[TextBlock](/block/#textblock-class) for some high-level configurations. You can\n", "also do not specify these arguments, which would leave the different choices to\n", "be tuned automatically. See the following example for detail.\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "input_node = ak.TextInput()\n", "output_node = ak.TextBlock()(input_node)\n", "output_node = ak.RegressionHead()(output_node)\n", "reg = ak.AutoModel(\n", " inputs=input_node, outputs=output_node, overwrite=True, max_trials=1\n", ")\n", "reg.fit(x_train, y_train, epochs=1, batch_size=2)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text" }, "source": [ "## Reference\n", "[TextRegressor](/text_regressor),\n", "[AutoModel](/auto_model/#automodel-class),\n", "[TextBlock](/block/#textblock-class),\n", "[ConvBlock](/block/#convblock-class),\n", "[TextInput](/node/#textinput-class),\n", "[RegressionHead](/block/#regressionhead-class).\n" ] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "text_regression", "private_outputs": false, "provenance": [], "toc_visible": true }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" } }, "nbformat": 4, "nbformat_minor": 0 } ================================================ FILE: docs/keras_autodoc/__init__.py ================================================ from .autogen import DocumentationGenerator from .gathering_members import get_classes from .gathering_members import get_functions from .gathering_members import get_methods ================================================ FILE: docs/keras_autodoc/autogen.py ================================================ import pathlib import shutil from inspect import getdoc from inspect import isclass from typing import Dict from typing import List from typing import Union from typing import get_type_hints from . import utils from .docstring import process_docstring from .examples import copy_examples from .get_signatures import get_signature class DocumentationGenerator: """Generates the documentation. # Arguments pages: A dictionary. The keys are the files' paths, the values are lists of strings, functions /classes / methods names with dotted access to the object. For example, `pages = {'my_file.md': ['keras.layers.Dense']}` is valid. project_url: The url pointing to the module directory of your project on GitHub. This will be used to make a `[Sources]` link. template_dir: Where to put the markdown files which will be copied and filled in the destination directory. You should put files like `index.md` inside. If you want a markdown file to be filled with the docstring of a function, use the `{{autogenerated}}` tag inside, and then add the markdown file to the `pages` dictionary. example_dir: Where you store examples in your project. Usually standalone files with a markdown docstring at the top. Will be inserted in the docs. extra_aliases: When displaying type hints, it's possible that the full dotted path is displayed instead of alias. The aliases present in `pages` are used, but it may happen if you're using a third-party library. For example `tensorflow.python.ops.variables.Variable` is displayed instead of `tensorflow.Variable`. Here you have two solutions, either you provide the import keras-autodoc should follow: `extra_aliases=["tensorflow.Variable"]`, either you provide a mapping to use `extra_aliases={"tensorflow.python.ops.variables.Variable": "tf.Variable"}`. The second option should be used if you want more control and that you don't want to respect the alias corresponding to the import (you can't do `import tf.Variable`). When giving a list, keras-autodoc will try to import the object from the string to understand what object you want to replace. max_signature_line_length: When displaying class and function signatures, keras-autodoc formats them using Black. This parameter controls the maximum line length of these signatures, and is passed directly through to Black. titles_size: `"#"` signs to put before a title in the generated markdown. """ def __init__( self, pages: Dict[str, list] = {}, project_url: Union[str, Dict[str, str]] = None, template_dir=None, examples_dir=None, extra_aliases: Union[List[str], Dict[str, str]] = None, max_signature_line_length: int = 110, titles_size="###", ): self.pages = pages self.project_url = project_url self.template_dir = template_dir self.examples_dir = examples_dir self.class_aliases = {} self._fill_aliases(extra_aliases) self.max_signature_line_length = max_signature_line_length self.titles_size = titles_size def generate(self, dest_dir): """Generate the docs. # Arguments dest_dir: Where to put the resulting markdown files. """ dest_dir = pathlib.Path(dest_dir) print("Cleaning up existing sources directory.") if dest_dir.exists(): shutil.rmtree(dest_dir) print("Populating sources directory with templates.") if self.template_dir: shutil.copytree(self.template_dir, dest_dir) for file_path, elements in self.pages.items(): markdown_text = "" for element in elements: markdown_text += self._render(element) utils.insert_in_file(markdown_text, dest_dir / file_path) if self.examples_dir is not None: copy_examples(self.examples_dir, dest_dir / "examples") def process_docstring(self, docstring, types: dict = None): """Can be overridden.""" processsed = process_docstring(docstring, types, self.class_aliases) return processsed def process_signature(self, signature): """Can be overridden.""" return signature def _render(self, element): if isinstance(element, str): object_ = utils.import_object(element) if utils.ismethod(object_): # we remove the modules when displaying the methods signature_override = ".".join(element.split(".")[-2:]) else: signature_override = element else: signature_override = None object_ = element return self._render_from_object(object_, signature_override) def _render_from_object(self, object_, signature_override: str): subblocks = [] if self.project_url is not None: subblocks.append(utils.make_source_link(object_, self.project_url)) signature = get_signature( object_, signature_override, self.max_signature_line_length ) signature = self.process_signature(signature) subblocks.append(f"{self.titles_size} {object_.__name__}\n") subblocks.append(utils.code_snippet(signature)) docstring = getdoc(object_) if docstring: if isclass(object_): type_hints = get_type_hints(object_.__init__) else: type_hints = get_type_hints(object_) docstring = self.process_docstring(docstring, type_hints) subblocks.append(docstring) return "\n\n".join(subblocks) + "\n\n----\n\n" def _fill_aliases(self, extra_aliases): for list_elements in self.pages.values(): for element_as_str in list_elements: element = utils.import_object(element_as_str) if not isclass(element): continue true_dotted_path = utils.get_dotted_path(element) self.class_aliases[true_dotted_path] = element_as_str if isinstance(extra_aliases, dict): self.class_aliases.update(extra_aliases) elif isinstance(extra_aliases, list): for alias in extra_aliases: full_dotted_path = utils.get_dotted_path( utils.import_object(alias) ) self.class_aliases[full_dotted_path] = alias ================================================ FILE: docs/keras_autodoc/docstring.py ================================================ import itertools import re from sphinx.util.typing import stringify_annotation from . import utils def get_code_blocks(docstring): code_blocks = {} tmp = docstring[:] while "```" in tmp: tmp = tmp[tmp.find("```") :] index = tmp[3:].find("```") + 6 snippet = tmp[:index] # Place marker in docstring for later reinjection. token = f"$KERAS_AUTODOC_CODE_BLOCK_{len(code_blocks)}" docstring = docstring.replace(snippet, token) code_blocks[token] = snippet tmp = tmp[index:] return code_blocks, docstring def get_section_end(docstring, section_start): regex_indented_sections_end = re.compile(r"\S\n+(\S|$)") end = re.search(regex_indented_sections_end, docstring[section_start:]) section_end = section_start + end.end() if section_end == len(docstring): return section_end else: return section_end - 2 def get_google_style_sections_without_code(docstring): regex_indented_sections_start = re.compile(r"\n# .+?\n") google_style_sections = {} for i in itertools.count(): match = re.search(regex_indented_sections_start, docstring) if match is None: break section_start = match.start() + 1 section_end = get_section_end(docstring, section_start) google_style_section = docstring[section_start:section_end] token = f"KERAS_AUTODOC_GOOGLE_STYLE_SECTION_{i}" google_style_sections[token] = google_style_section docstring = utils.insert_in_string( docstring, token, section_start, section_end ) return google_style_sections, docstring def get_google_style_sections(docstring): # First, extract code blocks and process them. # The parsing is easier if the #, : and other symbols aren't there. code_blocks, docstring = get_code_blocks(docstring) google_style_sections, docstring = get_google_style_sections_without_code( docstring ) docstring = reinject_strings(docstring, code_blocks) for section_token, section in google_style_sections.items(): google_style_sections[section_token] = reinject_strings( section, code_blocks ) return google_style_sections, docstring def to_markdown( google_style_section: str, types: dict = None, aliases=None ) -> str: end_first_line = google_style_section.find("\n") section_title = google_style_section[2:end_first_line] section_body = google_style_section[end_first_line + 1 :] section_body = utils.remove_indentation(section_body.strip()) # it's a list of elements, a special formatting is applied. if section_title == "Arguments": section_body = format_as_markdown_list(section_body, types, aliases) elif section_title in ("Attributes", "Raises"): section_body = format_as_markdown_list(section_body) if section_body: return f"__{section_title}__\n\n{section_body}\n" else: return f"__{section_title}__\n" def format_as_markdown_list( section_body, types: dict = None, aliases: dict = None ): section_body = re.sub(r"\n([^ ].*?):", r"\n- __\1__:", section_body) section_body = re.sub(r"^([^ ].*?):", r"- __\1__:", section_body) # Optionally add type annotations to docstring if types: for arg, arg_type in types.items(): type_hint_str = apply_aliases( stringify_annotation(arg_type), aliases ) section_body = re.sub( rf"(- __{arg}__)", rf"\1 `{type_hint_str}`", section_body ) return section_body def apply_aliases(string: str, aliases: dict): for dotted_path, alias in aliases.items(): string = string.replace(dotted_path, alias) return string def reinject_strings(target, strings_to_inject): for token, string_to_inject in strings_to_inject.items(): target = target.replace(token, string_to_inject) return target def process_docstring(docstring, types: dict = None, aliases=None): if docstring[-1] != "\n": docstring += "\n" google_style_sections, docstring = get_google_style_sections(docstring) for token, google_style_section in google_style_sections.items(): markdown_section = to_markdown(google_style_section, types, aliases) docstring = docstring.replace(token, markdown_section) return docstring ================================================ FILE: docs/keras_autodoc/examples.py ================================================ import os import pathlib def copy_examples(examples_dir, destination_dir): """Copy the examples directory in the documentation. Prettify files by extracting the docstrings written in Markdown. """ pathlib.Path(destination_dir).mkdir(exist_ok=True) for file in os.listdir(examples_dir): if not file.endswith(".py"): continue module_path = os.path.join(examples_dir, file) docstring, starting_line = get_module_docstring(module_path) destination_file = os.path.join(destination_dir, file[:-2] + "md") with open(destination_file, "w+", encoding="utf-8") as f_out, open( examples_dir / file, "r+", encoding="utf-8" ) as f_in: f_out.write(docstring + "\n\n") # skip docstring for _ in range(starting_line): next(f_in) f_out.write("```python\n") # next line might be empty. line = next(f_in) if line != "\n": f_out.write(line) # copy the rest of the file. for line in f_in: f_out.write(line) f_out.write("```") def get_module_docstring(filepath): """Extract the module docstring. Also finds the line at which the docstring ends. """ co = compile(open(filepath, encoding="utf-8").read(), filepath, "exec") if co.co_consts and isinstance(co.co_consts[0], str): docstring = co.co_consts[0] else: print("Could not get the docstring from " + filepath) docstring = "" return docstring, co.co_firstlineno ================================================ FILE: docs/keras_autodoc/gathering_members.py ================================================ import inspect from inspect import isclass from inspect import isfunction from inspect import isroutine from typing import List from .utils import import_object def get_classes(module, exclude: List[str] = None, return_strings: bool = True): """Get all the classes of a module. # Arguments module: The module to fetch the classes from. If it's a string, it should be in the dotted format. `'keras.layers'` for example. exclude: The names which will be excluded from the returned list. For example, `get_classes('keras.layers', exclude=['Dense', 'Conv2D'])`. return_strings: If False, the actual classes will be returned. Note that if you use aliases when building your docs, you should use strings. This is because the computed signature uses `__name__` and `__module__` if you don't provide a string as input. # Returns A list of strings or a list of classes. """ return _get_all_module_element(module, exclude, return_strings, True) def get_functions( module, exclude: List[str] = None, return_strings: bool = True ): """Get all the functions of a module. # Arguments module: The module to fetch the functions from. If it's a string, it should be in the dotted format. `'keras.backend'` for example. exclude: The names which will be excluded from the returned list. For example, `get_functions('keras.backend', exclude=['max'])`. return_strings: If False, the actual functions will be returned. Note that if you use aliases when building your docs, you should use strings. This is because the computed signature uses `__name__` and `__module__` if you don't provide a string as input. # Returns A list of strings or a list of functions. """ return _get_all_module_element(module, exclude, return_strings, False) def get_methods(cls, exclude=None, return_strings=True): """Get all the method of a class. # Arguments cls: The class to fetch the methods from. If it's a string, it should be in the dotted format. `'keras.layers.Dense'` for example. exclude: The names which will be excluded from the returned list. For example, `get_methods('keras.Model', exclude=['save'])`. return_strings: If False, the actual methods will be returned. Note that if you use aliases when building your docs, you should use strings. This is because the computed signature uses `__name__` and `__module__` if you don't provide a string as input. # Returns A list of strings or a list of methods. """ if isinstance(cls, str): cls_str = cls cls = import_object(cls) else: cls_str = f"{cls.__module__}.{cls.__name__}" exclude = exclude or [] methods = [] for _, method in inspect.getmembers(cls, predicate=isroutine): if method.__name__[0] == "_" or method.__name__ in exclude: continue if return_strings: methods.append(f"{cls_str}.{method.__name__}") else: methods.append(method) return methods def _get_all_module_element(module, exclude, return_strings, class_): if isinstance(module, str): module = import_object(module) exclude = exclude or [] module_data = [] for name in dir(module): module_member = getattr(module, name) if not (isfunction(module_member) or isclass(module_member)): continue if name[0] == "_" or name in exclude: continue if module.__name__ not in module_member.__module__: continue if module_member in module_data: continue if class_ and not isclass(module_member): continue if not class_ and not isfunction(module_member): continue if return_strings: module_data.append(f"{module.__name__}.{name}") else: module_data.append(module_member) module_data.sort(key=id) return module_data ================================================ FILE: docs/keras_autodoc/get_signatures.py ================================================ import inspect import warnings import black from sphinx.util.inspect import signature from sphinx.util.inspect import stringify_signature from . import utils def get_signature_start(function): """For the Dense layer, it should return the string 'keras.layers.Dense'""" if utils.ismethod(function): prefix = f"{utils.get_class_from_method(function).__name__}." else: try: prefix = f"{function.__module__}." except AttributeError: warnings.warn( f"function {function} has no module. " f"It will not be included in the signature." ) prefix = "" return f"{prefix}{function.__name__}" def get_signature_end(function): sig = signature(function) signature_end = stringify_signature(sig, show_annotation=False) if utils.ismethod(function): signature_end = signature_end.replace("(self, ", "(") signature_end = signature_end.replace("(self)", "()") return signature_end def get_function_signature(function, override=None, max_line_length: int = 110): if override is None: signature_start = get_signature_start(function) else: signature_start = override signature_end = get_signature_end(function) return format_signature(signature_start, signature_end, max_line_length) def get_class_signature(cls, override=None, max_line_length: int = 110): if override is None: signature_start = f"{cls.__module__}.{cls.__name__}" else: signature_start = override signature_end = get_signature_end(cls.__init__) return format_signature(signature_start, signature_end, max_line_length) def get_signature(object_, override=None, max_line_length: int = 110): if inspect.isclass(object_): return get_class_signature(object_, override, max_line_length) else: return get_function_signature(object_, override, max_line_length) def format_signature( signature_start: str, signature_end: str, max_line_length: int = 110 ): """pretty formatting to avoid long signatures on one single line""" # first, we make it look like a real function declaration. fake_signature_start = "x" * len(signature_start) fake_signature = fake_signature_start + signature_end fake_python_code = f"def {fake_signature}:\n pass\n" # we format with black mode = black.FileMode(line_length=max_line_length) formatted_fake_python_code = black.format_str(fake_python_code, mode=mode) # we make the final, multiline signature new_signature_end = extract_signature_end(formatted_fake_python_code) return signature_start + new_signature_end def extract_signature_end(function_definition): start = function_definition.find("(") stop = function_definition.rfind(")") return function_definition[start : stop + 1] ================================================ FILE: docs/keras_autodoc/utils.py ================================================ import importlib import inspect import os import re def count_leading_spaces(s): ws = re.search(r"\S", s) if ws: return ws.start() else: return 0 def insert_in_file(markdown_text, file_path): """Save module page. Either insert content into existing page, or create page otherwise.""" if file_path.exists(): template = file_path.read_text(encoding="utf-8") if "{{autogenerated}}" not in template: raise RuntimeError( f"Template found for {file_path} but missing " f"{{autogenerated}} tag." ) markdown_text = template.replace("{{autogenerated}}", markdown_text) print("...inserting autogenerated content into template:", file_path) else: print("...creating new page with autogenerated content:", file_path) os.makedirs(file_path.parent, exist_ok=True) file_path.write_text(markdown_text, encoding="utf-8") def code_snippet(snippet): return f"```python\n{snippet}\n```\n" def make_source_link(cls, project_url): if isinstance(project_url, dict): base_module = cls.__module__.split(".")[0] project_url = project_url[base_module] path = cls.__module__.replace(".", "/") line = inspect.getsourcelines(cls)[-1] return ( f'' f"[[source]]({project_url}/{path}.py#L{line})" f"" ) def format_classes_list(classes, page_name): for i in range(len(classes)): if not isinstance(classes[i], (list, tuple)): classes[i] = (classes[i], []) for class_, class_methods in classes: if not inspect.isclass(class_): # TODO: add a test for this raise TypeError( f"{class_} was given in the class list " f"of {page_name} but {class_} is not a Python class." ) return classes def get_class_from_method(meth): """See https://stackoverflow.com/questions/3589311/ get-defining-class-of-unbound-method-object-in-python-3/ 25959545#25959545 """ if inspect.ismethod(meth): for cls in inspect.getmro(meth.__self__.__class__): if cls.__dict__.get(meth.__name__) is meth: return cls meth = meth.__func__ # fallback to __qualname__ parsing if inspect.isfunction(meth): cls = getattr( inspect.getmodule(meth), meth.__qualname__.split(".", 1)[0].rsplit(".", 1)[0], ) if isinstance(cls, type): return cls return getattr( meth, "__objclass__", None ) # handle special descriptor objects def ismethod(function): return get_class_from_method(function) is not None def import_object(string: str): """Import an object from a string. The object can be a function, class or method. For example: `'keras.layers.Dense.get_weights'` is valid. """ last_object_got = None seen_names = [] for name in string.split("."): seen_names.append(name) try: last_object_got = importlib.import_module(".".join(seen_names)) except ModuleNotFoundError: last_object_got = getattr(last_object_got, name) return last_object_got def get_type(object_) -> str: if inspect.isclass(object_): return "class" elif ismethod(object_): return "method" elif inspect.isfunction(object_): return "function" else: raise TypeError( f"{object_} is detected as neither a class, a method nor" f"a function." ) def insert_in_string(target, string_to_insert, start, end): target_start_cut = target[:start] target_end_cut = target[end:] return target_start_cut + string_to_insert + target_end_cut def remove_indentation(string): string = string.replace("\n ", "\n") if string[:4] == " ": string = string[4:] return string def get_dotted_path(class_): return f"{class_.__module__}.{class_.__qualname__}" ================================================ FILE: docs/mkdocs.yml ================================================ site_name: AutoKeras theme: favicon: '/img/favicon.png' logo: '/img/logo_white.svg' name: 'material' docs_dir: sources repo_url: https://github.com/keras-team/autokeras site_url: http://autokeras.com edit_uri: "" site_description: 'Documentation for AutoKeras.' markdown_extensions: - codehilite - pymdownx.superfences: custom_fences: - name: mermaid class: mermaid format: !!python/name:pymdownx.superfences.fence_div_format - pymdownx.emoji: emoji_index: !!python/name:material.extensions.emoji.twemoji emoji_generator: !!python/name:material.extensions.emoji.to_svg - admonition extra: analytics: provider: google property: G-GTF9QP8DFD extra_css: - stylesheets/extra.css extra_javascript: - https://unpkg.com/mermaid@8.4.4/dist/mermaid.min.js nav: - Home: index.md - Installation: install.md - Tutorials: - Overview: tutorial/overview.md - Image Classification: tutorial/image_classification.md - Image Regression: tutorial/image_regression.md - Text Classification: tutorial/text_classification.md - Text Regression: tutorial/text_regression.md - Structured Data Classification: tutorial/structured_data_classification.md - Structured Data Regression: tutorial/structured_data_regression.md - Multi-Modal and Multi-Task: tutorial/multi.md - Customized Model: tutorial/customized.md - Export Model: tutorial/export.md - Load Data from Disk: tutorial/load.md - FAQ: tutorial/faq.md - Contributing Guide: contributing.md - Documentation: - ImageClassifier: image_classifier.md - ImageRegressor: image_regressor.md - TextClassifier: text_classifier.md - TextRegressor: text_regressor.md - StructuredDataClassifier: structured_data_classifier.md - StructuredDataRegressor: structured_data_regressor.md - AutoModel: auto_model.md - Base Class: base.md - Node: node.md - Block: block.md - Utils: utils.md - About: about.md ================================================ FILE: docs/py/customized.py ================================================ """shell export KERAS_BACKEND="torch" pip install autokeras """ import keras import numpy as np import tree from keras.datasets import mnist import autokeras as ak """ In this tutorial, we show how to customize your search space with [AutoModel](/auto_model/#automodel-class) and how to implement your own block as search space. This API is mainly for advanced users who already know what their model should look like. ## Customized Search Space First, let us see how we can build the following neural network using the building blocks in AutoKeras.
graph LR id1(ImageInput) --> id2(Normalization) id2 --> id3(Image Augmentation) id3 --> id4(Convolutional) id3 --> id5(ResNet V2) id4 --> id6(Merge) id5 --> id6 id6 --> id7(Classification Head)
We can make use of the [AutoModel](/auto_model/#automodel-class) API in AutoKeras to implemented as follows. The usage is the same as the [Keras functional API](https://keras.io/api/models/model/#with-the-functional-api). Since this is just a demo, we use small amount of `max_trials` and `epochs`. """ input_node = ak.ImageInput() output_node = ak.Normalization()(input_node) output_node1 = ak.ConvBlock()(output_node) output_node2 = ak.ResNetBlock(version="v2")(output_node) output_node = ak.Merge()([output_node1, output_node2]) output_node = ak.ClassificationHead()(output_node) auto_model = ak.AutoModel( inputs=input_node, outputs=output_node, overwrite=True, max_trials=1 ) """ Whild building the model, the blocks used need to follow this topology: `Preprocessor` -> `Block` -> `Head`. `Normalization` and `ImageAugmentation` are `Preprocessor`s. `ClassificationHead` is `Head`. The rest are `Block`s. In the code above, we use `ak.ResNetBlock(version='v2')` to specify the version of ResNet to use. There are many other arguments to specify for each building block. For most of the arguments, if not specified, they would be tuned automatically. Please refer to the documentation links at the bottom of the page for more details. Then, we prepare some data to run the model. """ (x_train, y_train), (x_test, y_test) = mnist.load_data() print(x_train.shape) # (60000, 28, 28) print(y_train.shape) # (60000,) print(y_train[:3]) # array([7, 2, 1], dtype=uint8) # Feed the AutoModel with training data. auto_model.fit(x_train[:100], y_train[:100], epochs=1) # Predict with the best model. predicted_y = auto_model.predict(x_test) # Evaluate the best model with testing data. print(auto_model.evaluate(x_test, y_test)) """ For multiple input nodes and multiple heads search space, you can refer to [this section](/tutorial/multi/#customized-search-space). ## Validation Data If you would like to provide your own validation data or change the ratio of the validation data, please refer to the Validation Data section of the tutorials of [Image Classification](/tutorial/image_classification/#validation-data), [Text Classification](/tutorial/text_classification/#validation-data), [Structured Data Classification](/tutorial/structured_data_classification/#validation-data), [Multi-task and Multiple Validation](/tutorial/multi/#validation-data). ## Implement New Block You can extend the [Block](/base/#block-class) class to implement your own building blocks and use it with [AutoModel](/auto_model/#automodel-class). The first step is to learn how to write a build function for [KerasTuner](https://keras-team.github.io/keras-tuner/#usage-the-basics). You need to override the [build function](/base/#build-method) of the block. The following example shows how to implement a single Dense layer block whose number of neurons is tunable. """ class SingleDenseLayerBlock(ak.Block): def build(self, hp, inputs=None): # Get the input_node from inputs. input_node = tree.flatten(inputs)[0] layer = keras.layers.Dense( hp.Int("num_units", min_value=32, max_value=512, step=32) ) output_node = layer(input_node) return output_node """ You can connect it with other blocks and build it into an [AutoModel](/auto_model/#automodel-class). """ # Build the AutoModel input_node = ak.Input() output_node = SingleDenseLayerBlock()(input_node) output_node = ak.RegressionHead()(output_node) auto_model = ak.AutoModel(input_node, output_node, overwrite=True, max_trials=1) # Prepare Data num_instances = 100 x_train = np.random.rand(num_instances, 20).astype(np.float32) y_train = np.random.rand(num_instances, 1).astype(np.float32) x_test = np.random.rand(num_instances, 20).astype(np.float32) y_test = np.random.rand(num_instances, 1).astype(np.float32) # Train the model auto_model.fit(x_train, y_train, epochs=1) print(auto_model.evaluate(x_test, y_test)) """ ## Reference [AutoModel](/auto_model/#automodel-class) **Nodes**: [ImageInput](/node/#imageinput-class), [Input](/node/#input-class), [TextInput](/node/#textinput-class). [StructuredDataInput](/node/#structureddatainput-class), **Preprocessors**: [FeatureEngineering](/block/#featureengineering-class), [ImageAugmentation](/block/#imageaugmentation-class), [LightGBM](/block/#lightgbm-class), [Normalization](/block/#normalization-class), **Blocks**: [ConvBlock](/block/#convblock-class), [DenseBlock](/block/#denseblock-class), [Embedding](/block/#embedding-class), [Merge](/block/#merge-class), [ResNetBlock](/block/#resnetblock-class), [RNNBlock](/block/#rnnblock-class), [SpatialReduction](/block/#spatialreduction-class), [TemporalReduction](/block/#temporalreduction-class), [XceptionBlock](/block/#xceptionblock-class), [ImageBlock](/block/#imageblock-class), [TextBlock](/block/#textblock-class). [StructuredDataBlock](/block/#structureddatablock-class), """ ================================================ FILE: docs/py/export.py ================================================ """shell export KERAS_BACKEND="torch" pip install autokeras """ import numpy as np from keras.datasets import mnist from keras.models import load_model import autokeras as ak """ You can easily export your model the best model found by AutoKeras as a Keras Model. The following example uses [ImageClassifier](/image_classifier) as an example. All the tasks and the [AutoModel](/auto_model/#automodel-class) has this [export_model](/auto_model/#export_model-method) function. """ (x_train, y_train), (x_test, y_test) = mnist.load_data() # Initialize the image classifier. clf = ak.ImageClassifier( overwrite=True, max_trials=1 ) # Try only 1 model.(Increase accordingly) # Feed the image classifier with training data. clf.fit(x_train, y_train, epochs=1) # Change no of epochs to improve the model # Export as a Keras Model. model = clf.export_model() print(type(model)) # model.save("model_autokeras.keras") loaded_model = load_model( "model_autokeras.keras", custom_objects=ak.CUSTOM_OBJECTS ) predicted_y = loaded_model.predict(np.expand_dims(x_test, -1)) print(predicted_y) ================================================ FILE: docs/py/image_classification.py ================================================ """shell export KERAS_BACKEND="torch" pip install autokeras """ from keras.datasets import mnist import autokeras as ak """ ## A Simple Example The first step is to prepare your data. Here we use the MNIST dataset as an example """ (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train[:100] y_train = y_train[:100] x_test = x_test[:100] y_test = y_test[:100] print(x_train.shape) # (60000, 28, 28) print(y_train.shape) # (60000,) print(y_train[:3]) # array([7, 2, 1], dtype=uint8) """ The second step is to run the ImageClassifier. It is recommended have more trials for more complicated datasets. This is just a quick demo of MNIST, so we set max_trials to 1. For the same reason, we set epochs to 10. You can also leave the epochs unspecified for an adaptive number of epochs. """ # Initialize the image classifier. clf = ak.ImageClassifier(overwrite=True, max_trials=1) # Feed the image classifier with training data. clf.fit(x_train, y_train, epochs=1) # Predict with the best model. predicted_y = clf.predict(x_test) print(predicted_y) # Evaluate the best model with testing data. print(clf.evaluate(x_test, y_test)) """ ## Validation Data By default, AutoKeras use the last 20% of training data as validation data. As shown in the example below, you can use validation_split to specify the percentage. """ clf.fit( x_train, y_train, # Split the training data and use the last 15% as validation data. validation_split=0.15, epochs=1, ) """ You can also use your own validation set instead of splitting it from the training data with validation_data. """ split = 50000 x_val = x_train[split:] y_val = y_train[split:] x_train = x_train[:split] y_train = y_train[:split] clf.fit( x_train, y_train, # Use your own validation set. validation_data=(x_val, y_val), epochs=1, ) """ ## Customized Search Space For advanced users, you may customize your search space by using AutoModel instead of ImageClassifier. You can configure the ImageBlock for some high-level configurations, e.g., block_type for the type of neural network to search, normalize for whether to do data normalization, augment for whether to do data augmentation. You can also do not specify these arguments, which would leave the different choices to be tuned automatically. See the following example for detail. """ input_node = ak.ImageInput() output_node = ak.ImageBlock( # Only search ResNet architectures. block_type="resnet", # Normalize the dataset. normalize=True, # Do not do data augmentation. augment=False, )(input_node) output_node = ak.ClassificationHead()(output_node) clf = ak.AutoModel( inputs=input_node, outputs=output_node, overwrite=True, max_trials=1 ) clf.fit(x_train, y_train, epochs=1) """ The usage of AutoModel is similar to the functional API of Keras. Basically, you are building a graph, whose edges are blocks and the nodes are intermediate outputs of blocks. To add an edge from input_node to output_node with output_node = ak.[some_block]([block_args])(input_node). You can even also use more fine grained blocks to customize the search space even further. See the following example. """ input_node = ak.ImageInput() output_node = ak.Normalization()(input_node) output_node = ak.ImageAugmentation(horizontal_flip=False)(output_node) output_node = ak.ResNetBlock(version="v2")(output_node) output_node = ak.ClassificationHead()(output_node) clf = ak.AutoModel( inputs=input_node, outputs=output_node, overwrite=True, max_trials=1 ) clf.fit(x_train, y_train, epochs=1) """ ## Reference [ImageClassifier](/image_classifier), [AutoModel](/auto_model/#automodel-class), [ImageBlock](/block/#imageblock-class), [Normalization](/block/#normalization-class), [ImageAugmentation](/block/#image-augmentation-class), [ResNetBlock](/block/#resnetblock-class), [ImageInput](/node/#imageinput-class), [ClassificationHead](/block/#classificationhead-class). """ ================================================ FILE: docs/py/image_regression.py ================================================ """shell export KERAS_BACKEND="torch" pip install autokeras """ from keras.datasets import mnist import autokeras as ak """ To make this tutorial easy to follow, we just treat MNIST dataset as a regression dataset. It means we will treat prediction targets of MNIST dataset, which are integers ranging from 0 to 9 as numerical values, so that they can be directly used as the regression targets. ## A Simple Example The first step is to prepare your data. Here we use the MNIST dataset as an example """ (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train[:100] y_train = y_train[:100] x_test = x_test[:100] y_test = y_test[:100] print(x_train.shape) # (60000, 28, 28) print(y_train.shape) # (60000,) print(y_train[:3]) # array([7, 2, 1], dtype=uint8) """ The second step is to run the ImageRegressor. It is recommended have more trials for more complicated datasets. This is just a quick demo of MNIST, so we set max_trials to 1. For the same reason, we set epochs to 1. You can also leave the epochs unspecified for an adaptive number of epochs. """ # Initialize the image regressor. reg = ak.ImageRegressor(overwrite=True, max_trials=1) # Feed the image regressor with training data. reg.fit(x_train, y_train, epochs=1) # Predict with the best model. predicted_y = reg.predict(x_test) print(predicted_y) # Evaluate the best model with testing data. print(reg.evaluate(x_test, y_test)) """ ## Validation Data By default, AutoKeras use the last 20% of training data as validation data. As shown in the example below, you can use validation_split to specify the percentage. """ reg.fit( x_train, y_train, # Split the training data and use the last 15% as validation data. validation_split=0.15, epochs=1, ) """ You can also use your own validation set instead of splitting it from the training data with validation_data. """ split = 50000 x_val = x_train[split:] y_val = y_train[split:] x_train = x_train[:split] y_train = y_train[:split] reg.fit( x_train, y_train, # Use your own validation set. validation_data=(x_val, y_val), epochs=2, ) """ ## Customized Search Space For advanced users, you may customize your search space by using AutoModel instead of ImageRegressor. You can configure the ImageBlock for some high-level configurations, e.g., block_type for the type of neural network to search, normalize for whether to do data normalization, augment for whether to do data augmentation. You can also do not specify these arguments, which would leave the different choices to be tuned automatically. See the following example for detail. """ input_node = ak.ImageInput() output_node = ak.ImageBlock( # Only search ResNet architectures. block_type="resnet", # Normalize the dataset. normalize=False, # Do not do data augmentation. augment=False, )(input_node) output_node = ak.RegressionHead()(output_node) reg = ak.AutoModel( inputs=input_node, outputs=output_node, overwrite=True, max_trials=1 ) reg.fit(x_train, y_train, epochs=1) """ The usage of AutoModel is similar to the functional API of Keras. Basically, you are building a graph, whose edges are blocks and the nodes are intermediate outputs of blocks. To add an edge from input_node to output_node with output_node = ak.[some_block]([block_args])(input_node). You can even also use more fine grained blocks to customize the search space even further. See the following example. """ input_node = ak.ImageInput() output_node = ak.Normalization()(input_node) output_node = ak.ImageAugmentation(horizontal_flip=False)(output_node) output_node = ak.ResNetBlock(version="v2")(output_node) output_node = ak.RegressionHead()(output_node) reg = ak.AutoModel( inputs=input_node, outputs=output_node, overwrite=True, max_trials=1 ) reg.fit(x_train, y_train, epochs=1) """ ## Reference [ImageRegressor](/image_regressor), [AutoModel](/auto_model/#automodel-class), [ImageBlock](/block/#imageblock-class), [Normalization](/block/#normalization-class), [ImageAugmentation](/block/#image-augmentation-class), [ResNetBlock](/block/#resnetblock-class), [ImageInput](/node/#imageinput-class), [RegressionHead](/block/#regressionhead-class). """ ================================================ FILE: docs/py/multi.py ================================================ """shell export KERAS_BACKEND="torch" pip install autokeras """ import numpy as np import autokeras as ak """ In this tutorial we are making use of the [AutoModel](/auto_model/#automodel-class) API to show how to handle multi-modal data and multi-task. ## What is multi-modal? Multi-modal data means each data instance has multiple forms of information. For example, a photo can be saved as a image. Besides the image, it may also have when and where it was taken as its attributes, which can be represented as numerical data. ## What is multi-task? Multi-task here we refer to we want to predict multiple targets with the same input features. For example, we not only want to classify an image according to its content, but we also want to regress its quality as a float number between 0 and 1. The following diagram shows an example of multi-modal and multi-task neural network model.
graph TD id1(ImageInput) --> id3(Some Neural Network Model) id2(Input) --> id3 id3 --> id4(ClassificationHead) id3 --> id5(RegressionHead)
It has two inputs the images and the numerical input data. Each image is associated with a set of attributes in the numerical input data. From these data, we are trying to predict the classification label and the regression value at the same time. ## Data Preparation To illustrate our idea, we generate some random image and numerical data as the multi-modal data. """ num_instances = 10 # Generate image data. image_data = np.random.rand(num_instances, 32, 32, 3).astype(np.float32) # Generate numerical data. numerical_data = np.random.rand(num_instances, 20).astype(np.float32) """ We also generate some multi-task targets for classification and regression. """ # Generate regression targets. regression_target = np.random.rand(num_instances, 1).astype(np.float32) # Generate classification labels of five classes. classification_target = np.random.randint(5, size=num_instances) """ ## Build and Train the Model Then we initialize the multi-modal and multi-task model with [AutoModel](/auto_model/#automodel-class). Since this is just a demo, we use small amount of `max_trials` and `epochs`. """ # Initialize the multi with multiple inputs and outputs. model = ak.AutoModel( inputs=[ak.ImageInput(), ak.Input()], outputs=[ ak.RegressionHead(metrics=["mae"]), ak.ClassificationHead( loss="categorical_crossentropy", metrics=["accuracy"] ), ], overwrite=True, max_trials=2, ) # Fit the model with prepared data. model.fit( [image_data, numerical_data], [regression_target, classification_target], epochs=1, batch_size=3, ) """ ## Validation Data By default, AutoKeras use the last 20% of training data as validation data. As shown in the example below, you can use `validation_split` to specify the percentage. """ model.fit( [image_data, numerical_data], [regression_target, classification_target], # Split the training data and use the last 15% as validation data. validation_split=0.15, epochs=1, batch_size=3, ) """ You can also use your own validation set instead of splitting it from the training data with `validation_data`. """ split = 5 image_val = image_data[split:] numerical_val = numerical_data[split:] regression_val = regression_target[split:] classification_val = classification_target[split:] image_data = image_data[:split] numerical_data = numerical_data[:split] regression_target = regression_target[:split] classification_target = classification_target[:split] model.fit( [image_data, numerical_data], [regression_target, classification_target], # Use your own validation set. validation_data=( [image_val, numerical_val], [regression_val, classification_val], ), epochs=1, batch_size=3, ) """ ## Customized Search Space You can customize your search space. The following figure shows the search space we want to define.
graph LR id1(ImageInput) --> id2(Normalization) id2 --> id3(Image Augmentation) id3 --> id4(Convolutional) id3 --> id5(ResNet V2) id4 --> id6(Merge) id5 --> id6 id7(Input) --> id9(DenseBlock) id6 --> id10(Merge) id9 --> id10 id10 --> id11(Classification Head) id10 --> id12(Regression Head)
""" input_node1 = ak.ImageInput() output_node = ak.Normalization()(input_node1) output_node = ak.ImageAugmentation()(output_node) output_node1 = ak.ConvBlock()(output_node) output_node2 = ak.ResNetBlock(version="v2")(output_node) output_node1 = ak.Merge()([output_node1, output_node2]) input_node2 = ak.Input() output_node2 = ak.DenseBlock()(input_node2) output_node = ak.Merge()([output_node1, output_node2]) output_node1 = ak.ClassificationHead()(output_node) output_node2 = ak.RegressionHead()(output_node) auto_model = ak.AutoModel( inputs=[input_node1, input_node2], outputs=[output_node1, output_node2], overwrite=True, max_trials=2, ) image_data = np.random.rand(num_instances, 32, 32, 3).astype(np.float32) numerical_data = np.random.rand(num_instances, 20).astype(np.float32) regression_target = np.random.rand(num_instances, 1).astype(np.float32) classification_target = np.random.randint(5, size=num_instances) auto_model.fit( [image_data, numerical_data], [classification_target, regression_target], batch_size=3, epochs=1, ) """ ## Reference [AutoModel](/auto_model/#automodel-class), [ImageInput](/node/#imageinput-class), [Input](/node/#input-class), [DenseBlock](/block/#denseblock-class), [RegressionHead](/block/#regressionhead-class), [ClassificationHead](/block/#classificationhead-class), [CategoricalToNumerical](/block/#categoricaltonumerical-class). """ ================================================ FILE: docs/py/structured_data_classification.py ================================================ """shell export KERAS_BACKEND="torch" pip install autokeras """ import keras import pandas as pd import autokeras as ak """ ## A Simple Example The first step is to prepare your data. Here we use the [Titanic dataset](https://www.kaggle.com/c/titanic) as an example. """ TRAIN_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/train.csv" TEST_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/eval.csv" train_file_path = keras.utils.get_file("train.csv", TRAIN_DATA_URL) test_file_path = keras.utils.get_file("eval.csv", TEST_DATA_URL) # Load data into numpy arrays train_df = pd.read_csv(train_file_path) test_df = pd.read_csv(test_file_path) y_train = train_df["survived"].values x_train = train_df.drop("survived", axis=1).values y_test = test_df["survived"].values x_test = test_df.drop("survived", axis=1).values """ The second step is to run the [StructuredDataClassifier](/structured_data_classifier). As a quick demo, we set epochs to 10. You can also leave the epochs unspecified for an adaptive number of epochs. """ # Initialize the structured data classifier. clf = ak.StructuredDataClassifier( overwrite=True, max_trials=3 ) # It tries 3 different models. # Feed the structured data classifier with training data. clf.fit( x_train, y_train, epochs=10, ) # Predict with the best model. predicted_y = clf.predict(x_test) # Evaluate the best model with testing data. print(clf.evaluate(x_test, y_test)) """ You can also specify the column names and types for the data as follows. The `column_names` is optional if the training data already have the column names, e.g. pandas.DataFrame, CSV file. Any column, whose type is not specified will be inferred from the training data. """ # Initialize the structured data classifier. clf = ak.StructuredDataClassifier( column_names=[ "sex", "age", "n_siblings_spouses", "parch", "fare", "class", "deck", "embark_town", "alone", ], column_types={"sex": "categorical", "fare": "numerical"}, max_trials=10, # It tries 10 different models. overwrite=True, ) """ ## Validation Data By default, AutoKeras use the last 20% of training data as validation data. As shown in the example below, you can use `validation_split` to specify the percentage. """ clf.fit( x_train, y_train, # Split the training data and use the last 15% as validation data. validation_split=0.15, epochs=10, ) """ You can also use your own validation set instead of splitting it from the training data with `validation_data`. """ split = 500 x_val = x_train[split:] y_val = y_train[split:] x_train = x_train[:split] y_train = y_train[:split] clf.fit( x_train, y_train, # Use your own validation set. validation_data=(x_val, y_val), epochs=10, ) """ ## Reference [StructuredDataClassifier](/structured_data_classifier), [AutoModel](/auto_model/#automodel-class), [StructuredDataBlock](/block/#structureddatablock-class), [DenseBlock](/block/#denseblock-class), [StructuredDataInput](/node/#structureddatainput-class), [ClassificationHead](/block/#classificationhead-class), """ ================================================ FILE: docs/py/structured_data_regression.py ================================================ """shell export KERAS_BACKEND="torch" pip install autokeras """ from sklearn.datasets import fetch_california_housing import autokeras as ak """ ## A Simple Example The first step is to prepare your data. Here we use the [California housing dataset]( https://scikit-learn.org/stable/datasets/real_world.html#california-housing-dataset) as an example. """ house_dataset = fetch_california_housing() train_size = int(house_dataset.data.shape[0] * 0.9) x_train = house_dataset.data[:train_size] y_train = house_dataset.target[:train_size] x_test = house_dataset.data[train_size:] y_test = house_dataset.target[train_size:] """ The second step is to run the [StructuredDataRegressor](/structured_data_regressor). As a quick demo, we set epochs to 10. You can also leave the epochs unspecified for an adaptive number of epochs. """ # Initialize the structured data regressor. reg = ak.StructuredDataRegressor( overwrite=True, max_trials=3 ) # It tries 3 different models. # Feed the structured data regressor with training data. reg.fit( x_train, y_train, epochs=10, ) # Predict with the best model. predicted_y = reg.predict(x_test) # Evaluate the best model with testing data. print(reg.evaluate(x_test, y_test)) """ You can also specify the column names and types for the data as follows. The `column_names` is optional if the training data already have the column names, e.g. pandas.DataFrame, CSV file. Any column, whose type is not specified will be inferred from the training data. """ # Initialize the structured data regressor. reg = ak.StructuredDataRegressor( column_names=[ "MedInc", "HouseAge", "AveRooms", "AveBedrms", "Population", "AveOccup", "Latitude", "Longitude", ], column_types={"MedInc": "numerical", "Latitude": "numerical"}, max_trials=10, # It tries 10 different models. overwrite=True, ) """ ## Validation Data By default, AutoKeras use the last 20% of training data as validation data. As shown in the example below, you can use `validation_split` to specify the percentage. """ reg.fit( x_train, y_train, # Split the training data and use the last 15% as validation data. validation_split=0.15, epochs=10, ) """ You can also use your own validation set instead of splitting it from the training data with `validation_data`. """ split = 500 x_val = x_train[split:] y_val = y_train[split:] x_train = x_train[:split] y_train = y_train[:split] reg.fit( x_train, y_train, # Use your own validation set. validation_data=(x_val, y_val), epochs=10, ) """ ## Reference [StructuredDataRegressor](/structured_data_regressor), [AutoModel](/auto_model/#automodel-class), [StructuredDataBlock](/block/#structureddatablock-class), [DenseBlock](/block/#denseblock-class), [StructuredDataInput](/node/#structureddatainput-class), [RegressionHead](/block/#regressionhead-class), """ ================================================ FILE: docs/py/text_classification.py ================================================ """shell export KERAS_BACKEND="torch" pip install autokeras """ import os import keras import numpy as np from sklearn.datasets import load_files import autokeras as ak """ ## A Simple Example The first step is to prepare your data. Here we use the [IMDB dataset](https://keras.io/datasets/#imdb-movie-reviews-sentiment-classification) as an example. """ dataset = keras.utils.get_file( fname="aclImdb.tar.gz", origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz", extract=True, ) # set path to dataset IMDB_DATADIR = os.path.join( os.path.dirname(dataset), "aclImdb_extracted", "aclImdb" ) classes = ["pos", "neg"] train_data = load_files( os.path.join(IMDB_DATADIR, "train"), shuffle=True, categories=classes ) test_data = load_files( os.path.join(IMDB_DATADIR, "test"), shuffle=False, categories=classes ) x_train = np.array(train_data.data)[:100] y_train = np.array(train_data.target)[:100] x_test = np.array(test_data.data)[:100] y_test = np.array(test_data.target)[:100] print(x_train.shape) # (25000,) print(y_train.shape) # (25000, 1) print(x_train[0][:50]) # this film was just brilliant casting """ The second step is to run the [TextClassifier](/text_classifier). As a quick demo, we set epochs to 2. You can also leave the epochs unspecified for an adaptive number of epochs. """ # Initialize the text classifier. clf = ak.TextClassifier( overwrite=True, max_trials=1 ) # It only tries 1 model as a quick demo. # Feed the text classifier with training data. clf.fit(x_train, y_train, epochs=1, batch_size=2) # Predict with the best model. predicted_y = clf.predict(x_test) # Evaluate the best model with testing data. print(clf.evaluate(x_test, y_test)) """ ## Validation Data By default, AutoKeras use the last 20% of training data as validation data. As shown in the example below, you can use `validation_split` to specify the percentage. """ clf.fit( x_train, y_train, # Split the training data and use the last 15% as validation data. validation_split=0.15, epochs=1, batch_size=2, ) """ You can also use your own validation set instead of splitting it from the training data with `validation_data`. """ split = 5 x_val = x_train[split:] y_val = y_train[split:] x_train = x_train[:split] y_train = y_train[:split] clf.fit( x_train, y_train, epochs=1, # Use your own validation set. validation_data=(x_val, y_val), batch_size=2, ) """ ## Customized Search Space For advanced users, you may customize your search space by using [AutoModel](/auto_model/#automodel-class) instead of [TextClassifier](/text_classifier). You can configure the [TextBlock](/block/#textblock-class) for some high-level configurations. You can also do not specify these arguments, which would leave the different choices to be tuned automatically. See the following example for detail. """ input_node = ak.TextInput() output_node = ak.TextBlock()(input_node) output_node = ak.ClassificationHead()(output_node) clf = ak.AutoModel( inputs=input_node, outputs=output_node, overwrite=True, max_trials=1 ) clf.fit(x_train, y_train, epochs=1, batch_size=2) """ ## Reference [TextClassifier](/text_classifier), [AutoModel](/auto_model/#automodel-class), [ConvBlock](/block/#convblock-class), [TextInput](/node/#textinput-class), [ClassificationHead](/block/#classificationhead-class). """ ================================================ FILE: docs/py/text_regression.py ================================================ """shell export KERAS_BACKEND="torch" pip install autokeras """ import os import keras import numpy as np from sklearn.datasets import load_files import autokeras as ak """ To make this tutorial easy to follow, we just treat IMDB dataset as a regression dataset. It means we will treat prediction targets of IMDB dataset, which are 0s and 1s as numerical values, so that they can be directly used as the regression targets. ## A Simple Example The first step is to prepare your data. Here we use the [IMDB dataset](https://keras.io/datasets/#imdb-movie-reviews-sentiment-classification) as an example. """ dataset = keras.utils.get_file( fname="aclImdb.tar.gz", origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz", extract=True, ) # set path to dataset IMDB_DATADIR = os.path.join( os.path.dirname(dataset), "aclImdb_extracted", "aclImdb" ) classes = ["pos", "neg"] train_data = load_files( os.path.join(IMDB_DATADIR, "train"), shuffle=True, categories=classes ) test_data = load_files( os.path.join(IMDB_DATADIR, "test"), shuffle=False, categories=classes ) x_train = np.array(train_data.data)[:100] y_train = np.array(train_data.target)[:100] x_test = np.array(test_data.data)[:100] y_test = np.array(test_data.target)[:100] print(x_train.shape) # (25000,) print(y_train.shape) # (25000, 1) print(x_train[0][:50]) # this film was just brilliant casting """ The second step is to run the [TextRegressor](/text_regressor). As a quick demo, we set epochs to 2. You can also leave the epochs unspecified for an adaptive number of epochs. """ # Initialize the text regressor. reg = ak.TextRegressor( overwrite=True, max_trials=1 # It tries 10 different models. ) # Feed the text regressor with training data. reg.fit(x_train, y_train, epochs=1, batch_size=2) # Predict with the best model. predicted_y = reg.predict(x_test) # Evaluate the best model with testing data. print(reg.evaluate(x_test, y_test)) """ ## Validation Data By default, AutoKeras use the last 20% of training data as validation data. As shown in the example below, you can use `validation_split` to specify the percentage. """ reg.fit( x_train, y_train, # Split the training data and use the last 15% as validation data. validation_split=0.15, ) """ You can also use your own validation set instead of splitting it from the training data with `validation_data`. """ split = 5 x_val = x_train[split:] y_val = y_train[split:] x_train = x_train[:split] y_train = y_train[:split] reg.fit( x_train, y_train, epochs=1, # Use your own validation set. validation_data=(x_val, y_val), batch_size=2, ) """ ## Customized Search Space For advanced users, you may customize your search space by using [AutoModel](/auto_model/#automodel-class) instead of [TextRegressor](/text_regressor). You can configure the [TextBlock](/block/#textblock-class) for some high-level configurations. You can also do not specify these arguments, which would leave the different choices to be tuned automatically. See the following example for detail. """ input_node = ak.TextInput() output_node = ak.TextBlock()(input_node) output_node = ak.RegressionHead()(output_node) reg = ak.AutoModel( inputs=input_node, outputs=output_node, overwrite=True, max_trials=1 ) reg.fit(x_train, y_train, epochs=1, batch_size=2) """ ## Reference [TextRegressor](/text_regressor), [AutoModel](/auto_model/#automodel-class), [TextBlock](/block/#textblock-class), [ConvBlock](/block/#convblock-class), [TextInput](/node/#textinput-class), [RegressionHead](/block/#regressionhead-class). """ ================================================ FILE: docs/requirements.txt ================================================ mkdocs mkdocs-material pygments jupyter pymdown-extensions Sphinx markdown black ================================================ FILE: docs/run_py_files.sh ================================================ #!/bin/bash SUCCESS_FILE="success.txt" FAILURE_FILE="failure.txt" # Clear the files > $SUCCESS_FILE > $FAILURE_FILE for file in py/*.py; do if python3 "$file" > /dev/null 2>&1; then echo "$file" >> $SUCCESS_FILE else echo "$file" >> $FAILURE_FILE fi done ================================================ FILE: docs/templates/about.md ================================================ This package is developed by [DATA LAB](http://faculty.cs.tamu.edu/xiahu/) at Texas A&M University, collaborating with [keras-team](https://github.com/keras-team) for version 1.0 and above. ## Core Team [**Haifeng Jin**](https://github.com/haifeng-jin): Created, designed and implemented the AutoKeras system. Maintainer. [**François Chollet**](https://github.com/fchollet): The API and system architecture design for AutoKeras 1.0. Code reviews for pull requests. [**Qingquan Song**](https://github.com/song3134): Designed the neural architecture search algorithms. Implemented the tabular data classification and regression module. [**Xia "Ben" Hu**](http://faculty.cs.tamu.edu/xiahu/): Project lead and maintainer. ================================================ FILE: docs/templates/index.md ================================================ # drawing ## {{autogenerated}} ================================================ FILE: docs/templates/install.md ================================================ ## Requirements **Python 3**: Follow the TensorFlow install steps to install Python 3. **Pip**: Follow the TensorFlow install steps to install Pip. **PyTorch >= 2.3.0**: AutoKeras is based on Keras. We recommend using the PyTorch backend. Please follow [this page](https://pytorch.org/get-started/locally/) to install PyTorch. ## Install AutoKeras AutoKeras only support **Python 3**. If you followed previous steps to use virtualenv to install tensorflow, you can just activate the virtualenv and use the following command to install AutoKeras. ``` pip install git+https://github.com/keras-team/keras-tuner.git pip install autokeras ``` If you did not use virtualenv, and you use `python3` command to execute your python program, please use the following command to install AutoKeras. ``` python3 -m pip install git+https://github.com/keras-team/keras-tuner.git python3 -m pip install autokeras ``` ================================================ FILE: docs/templates/stylesheets/extra.css ================================================ :root>* { --md-primary-fg-color: #d00000; --md-accent-fg-color: #d00000; } ================================================ FILE: docs/templates/tutorial/faq.md ================================================ ## How to resume a previously killed run? This feature is controlled by the `overwrite` argument of `AutoModel` or any other task APIs. It is set to `False` by default, which means it would not overwrite the contents of the directory. In other words, it will continue the previous fit. You can just run the same code again. It will automatically resume the previously killed run. ## How to customize metrics and loss? Please see the code example below. ```python import autokeras as ak clf = ak.ImageClassifier( max_trials=3, metrics=['mse'], loss='mse', ) ``` ## How to use customized metrics to select the best model? By default, AutoKeras use validation loss as the metric for selecting the best model. Below is a code example of using customized metric for selecting models. Please read the comments for the details. ```python # Implement your customized metric according to the tutorial. # https://keras.io/api/metrics/#creating-custom-metrics import autokeras as ak def f1_score(y_true, y_pred): ... clf = ak.ImageClassifier( max_trials=3, # Wrap the function into a Keras Tuner Objective # and pass it to AutoKeras. # Direction can be 'min' or 'max' # meaning we want to minimize or maximize the metric. # 'val_f1_score' is just add a 'val_' prefix # to the function name or the metric name. objective=kerastuner.Objective('val_f1_score', direction='max'), # Include it as one of the metrics. metrics=[f1_score], ) ``` ================================================ FILE: docs/templates/tutorial/overview.md ================================================ # AutoKeras 1.0 Tutorial ## Supported Tasks AutoKeras supports several tasks with an extremely simple interface. You can click the links below to see the detailed tutorial for each task. **Supported Tasks**: [Image Classification](/tutorial/image_classification) [Image Regression](/tutorial/image_regression) [Text Classification](/tutorial/text_classification) [Text Regression](/tutorial/text_regression) [Structured Data Classification](/tutorial/structured_data_classification) [Structured Data Regression](/tutorial/structured_data_regression) ## Multi-Task and Multi-Modal Data If you are dealing with multi-task or multi-modal dataset, you can refer to this [tutorial](/tutorial/multi) for details. ## Customized Model Follow this [tutorial](/tutorial/customized), to use AutoKeras building blocks to quickly construct your own model. With these blocks, you only need to specify the high-level architecture of your model. AutoKeras would search for the best detailed configuration for you. Moreover, you can override the base classes to create your own block. The following are the links to the documentation of the predefined input nodes and blocks in AutoKeras. **Nodes**: [ImageInput](/node/#imageinput-class) [Input](/node/#input-class) [TextInput](/node/#textinput-class) [StructuredDataInput](/node/#structureddatainput-class) **Blocks**: [ImageAugmentation](/block/#imageaugmentation-class) [Normalization](/block/#normalization-class) [ConvBlock](/block/#convblock-class) [DenseBlock](/block/#denseblock-class) [Embedding](/block/#embedding-class) [Merge](/block/#merge-class) [ResNetBlock](/block/#resnetblock-class) [RNNBlock](/block/#rnnblock-class) [SpatialReduction](/block/#spatialreduction-class) [TemporalReduction](/block/#temporalreduction-class) [XceptionBlock](/block/#xceptionblock-class) [ImageBlock](/block/#imageblock-class) [TextBlock](/block/#textblock-class) [StructuredDataBlock](/block/#structureddatablock-class) [ClassificationHead](/block/#classificationhead-class) [RegressionHead](/block/#regressionhead-class) ## Export Model You can follow this [tutorial](/tutorial/export) to export the best model. ================================================ FILE: docs/tutobooks.py ================================================ """Keras tutobooks implementation. A tutobook is a tutorial available simultaneously as a notebook, as a Python script, and as a nicely rendered webpage. Its source-of-truth (for manual edition and version control) is its Python script form, but you can also create one by starting from a notebook and converting it with the command `nb2py`. Text cells are stored in markdown-formatted comment blocks. the first line (starting with " * 3) may optionally contain a special annotation, one of: - invisible: do not render this block. - shell: execute this block while prefixing each line with `!`. The script form should start with a header with the following fields: Title: Author: (could be `Authors`: as well, and may contain markdown links) Date created: (date in yyyy/mm/dd format) Last modified: (date in yyyy/mm/dd format) Description: (one-line text description) ## How to add a new code example to Keras.io You would typically start from an existing notebook. Save it to disk (let's say as `path_to_your_nb.ipynb`). `cd` to the `keras-io/scripts/` directory. Then run: ``` python tutobooks nb2py path_to_your_nb.ipynb ../examples/your_example.py ``` This will create the file `examples/your_example.py`. Open it, fill in the headers, and generally edit it so that it looks nice. NOTE THAT THE CONVERSION SCRIPT MAY MAKE MISTAKES IN ITS ATTEMPTS TO SHORTEN LINES. MAKE SURE TO PROOFREAD THE GENERATED .py IN FULL. Or alternatively, make sure to keep your lines reasonably-sized (<90 char) to start with, so that the script won't have to shorten them. You can then preview what it looks like when converted back again to ipynb by running: ``` python tutobooks py2nb ../examples/your_example.py preview.ipynb ``` NOTE THAT THIS COMMAND WILL ERROR OUT IF ANY CELLS TAKES TOO LONG TO EXECUTE. In that case, make your code lighter/faster. Remember that examples are meant to demonstrate workflows, not train state-of-the-art models. They should stay very lightweight. Open the generated `preview.ipynb` and make sure it looks like what you expect. If not, keep editing `your_example.py` until it does. Finally, submit a PR adding `examples/your_example.py`. """ import json import os import random import shutil import sys from pathlib import Path TIMEOUT = 60 * 60 MAX_LOC = 300 def nb_to_py(nb_path, py_path): f = open(nb_path) content = f.read() f.close() nb = json.loads(content) py = '"""\n' py += "Title: FILLME\n" py += "Author: FILLME\n" py += "Date created: FILLME\n" py += "Last modified: FILLME\n" py += "Description: FILLME\n" py += '"""\n' for cell in nb["cells"]: if cell["cell_type"] == "code": # Is it a shell cell? if ( cell["source"] and cell["source"][0] and cell["source"][0][0] == "!" ): # It's a shell cell py += '"""shell\n' py += "".join(cell["source"]) + "\n" py += '"""\n\n' else: # It's a Python cell py += "".join(cell["source"]) + "\n\n" elif cell["cell_type"] == "markdown": py += '"""\n' py += "".join(cell["source"]) + "\n" py += '"""\n\n' # Save file f = open(py_path, "w") f.write(py) f.close() # Format file with Black os.system("black " + py_path) # Shorten lines py = open(py_path).read() try: py = _shorten_lines(py) finally: f = open(py_path, "w") f.write(py) f.close() def py_to_nb(py_path, nb_path, fill_outputs=True): f = open(py_path) py = f.read() f.close() # validate(py) # header, _, py, tag = _get_next_script_element(py) # attributes = _parse_header(header) cells = [] loc = 0 # Write first header cell # header_cell = { # "cell_type": "markdown", # "source": [ # "# " + attributes["title"] + "\n", # "\n", # "**" + attributes["auth_field"] + ":** " + attributes["author"] +"
\n", # "**Date created:** " + attributes["date_created"] + "
\n", # "**Last modified:** " + attributes["last_modified"] + "
\n", # "**Description:** " + attributes["description"], # ], # "metadata": {"colab_type": "text"}, # } # cells.append(header_cell) while py: e, cell_type, py, tag = _get_next_script_element(py) lines = e.split("\n") if all(line == "" for line in lines): continue if lines and not lines[0]: lines = lines[1:] source = [line + "\n" for line in lines] # Drop last newline char if source and not source[-1].strip(): source = source[:-1] if tag == "shell": source = ["!" + line for line in source] cell_type = "code" if tag != "invisible" and source: cell = {"cell_type": cell_type, "source": source} if cell_type == "code": cell["outputs"] = [] cell["metadata"] = {"colab_type": "code"} cell["execution_count"] = 0 loc += _count_locs(source) else: cell["metadata"] = {"colab_type": "text"} cells.append(cell) notebook = {} for key in NB_BASE.keys(): notebook[key] = NB_BASE[key] notebook["metadata"]["colab"]["name"] = str(py_path).split("/")[-1][:-3] notebook["cells"] = cells if loc > MAX_LOC: raise ValueError( "Found %d lines of code, but expected fewer than %d" % (loc, MAX_LOC) ) f = open(nb_path, "w") f.write(json.dumps(notebook, indent=1, sort_keys=True)) f.close() if fill_outputs: print("Generating ipynb") parent_dir = Path(nb_path).parent current_files = os.listdir(parent_dir) try: os.system( "jupyter nbconvert --to notebook --execute --debug " + str(nb_path) + " --inplace" + " --ExecutePreprocessor.timeout=" + str(TIMEOUT) ) finally: new_files = os.listdir(parent_dir) for fname in new_files: if fname not in current_files: fpath = parent_dir / fname if os.path.isdir(fpath): print("Removing created folder:", fname) shutil.rmtree(fpath) else: print("Removing created file:", fname) os.remove(fpath) def nb_to_md(nb_path, md_path, img_dir, working_dir=None): img_exts = ("png", "jpg", "jpeg") # Assumes an already populated notebook. assert str(md_path).endswith(".md") current_dir = os.getcwd() original_img_dir = str(img_dir) if original_img_dir.endswith("/"): original_img_dir = original_img_dir[:-1] img_dir = os.path.abspath(img_dir) nb_path = os.path.abspath(nb_path) nb_fname = str(nb_path).split("/")[-1] del_working_dir = False if working_dir is None: del_working_dir = True working_dir = "tmp_" + str(random.randint(1e6, 1e7)) if not os.path.exists(working_dir): os.makedirs(working_dir) print("Using working_dir:", working_dir) os.chdir(working_dir) shutil.copyfile(nb_path, nb_fname) md_name = str(md_path).split("/")[-1][:-3] target_md = md_name + ".md" img_dir = Path(img_dir) / md_name if not os.path.exists(img_dir): os.makedirs(img_dir) os.system( # "jupyter nbconvert --to markdown --execute --debug " "jupyter nbconvert --to markdown " + nb_fname + " --output " + target_md # + " --ExecutePreprocessor.timeout=" # + str(TIMEOUT) ) tmp_img_dir = md_name + "_files" if os.path.exists(tmp_img_dir): for fname in os.listdir(tmp_img_dir): if fname.endswith(img_exts): src = Path(tmp_img_dir) / fname target = Path(img_dir) / fname print("copy", src, "to", target) shutil.copyfile(src, target) os.chdir(current_dir) md_content = open(Path(working_dir) / (md_name + ".md")).read() for ext in img_exts: md_content = md_content.replace( "![" + ext + "](" + md_name + "_files", "![" + ext + "](" + original_img_dir + "/" + md_name, ) md_content = _make_output_code_blocks(md_content) open(md_path, "w").write(md_content) if del_working_dir: shutil.rmtree(working_dir) def py_to_md(py_path, nb_path, md_path, img_dir, working_dir=None): py_to_nb(py_path, nb_path, fill_outputs=False) nb_to_md(nb_path, md_path, img_dir, working_dir=working_dir) def validate(py): """Validate the format of a tutobook script. Specifically: - validate headers - validate style with black """ lines = py.split("\n") if not lines[0].startswith('"""'): raise ValueError('Missing `"""`-fenced header at top of script.') if not lines[1].startswith("Title: "): raise ValueError("Missing `Title:` field.") if not lines[2].startswith("Author: ") and not lines[2].startswith( "Authors: " ): raise ValueError("Missing `Author:` field.") if not lines[3].startswith("Date created: "): raise ValueError("Missing `Date created:` field.") if not lines[4].startswith("Last modified: "): raise ValueError("Missing `Last modified:` field.") if not lines[5].startswith("Description: "): raise ValueError("Missing `Description:` field.") description = lines[5][len("Description: ") :] if not description: raise ValueError("Missing `Description:` field content.") if not description[0] == description[0].upper(): raise ValueError("Description field content must be capitalized.") if not description[-1] == ".": raise ValueError("Description field content must end with a period.") if len(description) > 100: raise ValueError( "Description field content must be less than 100 chars." ) for i, line in enumerate(lines): if line.startswith('"""') and line.endswith('"""') and len(line) > 3: raise ValueError( 'Do not use single line `"""`-fenced comments. ' "Encountered at line %d" % (i,) ) for i, line in enumerate(lines): if line.endswith(" "): raise ValueError( "Found trailing space on line %d; line: `%s`" % (i, line) ) # Validate style with black fpath = "/tmp/" + str(random.randint(1e6, 1e7)) + ".py" f = open(fpath, "w") pre_formatting = "\n".join(lines) f.write(pre_formatting) f.close() os.system("black " + fpath) f = open(fpath) formatted = f.read() f.close() os.remove(fpath) if formatted != pre_formatting: raise ValueError( "You python file did not follow `black` conventions. " "Run `black your_file.py` to autoformat it." ) def _count_locs(lines): loc = 0 string_open = False for line in lines: line = line.strip() if not line or line.startswith("#"): continue if not string_open: if not line.startswith('"""'): loc += 1 else: if not line.endswith('"""'): string_open = True else: if line.startswith('"""'): string_open = False return loc def _shorten_lines(py): max_len = 90 lines = [] for line in py.split("\n"): if len(line) <= max_len: lines.append(line) continue i = 0 while len(line) > max_len: line = line.lstrip() if " " not in line[1:]: lines.append(line) break else: short_line = line[:max_len] line = line[max_len:] if " " in short_line: reversed_short_line = short_line[::-1] index = reversed_short_line.find(" ") + 1 line = short_line[-index:] + line short_line = short_line[:-index] lines.append(short_line.lstrip()) i += 1 if i > 10: raise lines.append(line.lstrip()) return "\n".join(lines) def _get_next_script_element(py): lines = py.split("\n") assert lines elines = [] i = 0 tag = None if lines[0].startswith('"""'): assert len(lines) >= 2 etype = "markdown" if len(lines[0]) > 3: tag = lines[0][3:] if tag not in ["shell", "invisible"]: raise ValueError("Found unknown cell tag:", tag) lines = lines[1:] else: etype = "code" for i, line in enumerate(lines): if line.startswith('"""'): break else: elines.append(line) if etype == "markdown": py = "\n".join(lines[i + 1 :]) else: py = "\n".join(lines[i:]) e = "\n".join(elines) return e, etype, py, tag def _parse_header(header): lines = header.split("\n") title = lines[0][len("Title: ") :] author_line = lines[1] if author_line.startswith("Authors"): author = author_line[len("Authors: ") :] auth_field = "Authors" else: author = author_line[len("Author: ") :] auth_field = "Author" date_created = lines[2][len("Date created: ") :] last_modified = lines[3][len("Last modified: ") :] description = lines[4][len("Description: ") :] return { "title": title, "author": author, "auth_field": auth_field, "date_created": date_created, "last_modified": last_modified, "description": description, } def _make_output_code_blocks(md): lines = md.split("\n") output_lines = [] final_lines = [] is_inside_backticks = False def is_output_line(line, prev_line, output_lines): if line.startswith(" ") and len(line) >= 5: if output_lines or (lines[i - 1].strip() == "" and line.strip()): return True return False def flush(output_lines, final_lines): final_lines.append('
') final_lines.append("```") if len(output_lines) == 1: line = output_lines[0] final_lines.append(line[4:]) else: for line in output_lines: final_lines.append(line[4:]) final_lines.append("```") final_lines.append("
") for i, line in enumerate(lines): if line.startswith("```"): is_inside_backticks = not is_inside_backticks final_lines.append(line) continue if is_inside_backticks: final_lines.append(line) continue if i > 0 and is_output_line(line, lines[-1], output_lines): output_lines.append(line) elif not line: if output_lines: if output_lines[-1]: output_lines.append(line) else: final_lines.append(line) else: if output_lines: flush(output_lines, final_lines) output_lines = [] final_lines.append(line) if output_lines: flush(output_lines, final_lines) return "\n".join(final_lines) NB_BASE = { "metadata": { "colab": { "collapsed_sections": [], "name": "", # FILL ME "private_outputs": False, "provenance": [], "toc_visible": True, }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3", }, "language_info": { "codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0", }, }, "nbformat": 4, "nbformat_minor": 0, } if __name__ == "__main__": cmd = sys.argv[1] if cmd not in {"nb2py", "py2nb"}: raise ValueError( "Specify a command: either " "`nb2py source_filename.ipynb target_filename.py` or " "`py2nb source_filename.py target_file name.ipynb" ) if len(sys.argv) < 4: raise ValueError("Specify a source filename and a target filename") source = sys.argv[2] target = sys.argv[3] if cmd == "py2nb": if not source.endswith(".py"): raise ValueError( "The source filename should be a Python file. Got:", source ) if not target.endswith(".ipynb"): raise ValueError( "The target filename should be a notebook file. Got:", target ) py_to_nb(source, target) if cmd == "nb2py": if not source.endswith(".ipynb"): raise ValueError( "The source filename should be a notebook file. Got:", source ) if not target.endswith(".py"): raise ValueError( "The target filename should be a Python file. Got:", target ) nb_to_py(source, target) ================================================ FILE: examples/automodel_with_cnn.py ================================================ # Library import import keras import numpy as np import autokeras as ak # Prepare example Data - Shape 1D num_instances = 100 num_features = 5 x_train = np.random.rand(num_instances, num_features).astype(np.float32) y_train = np.zeros(num_instances).astype(np.float32) y_train[0 : int(num_instances / 2)] = 1 x_test = np.random.rand(num_instances, num_features).astype(np.float32) y_test = np.zeros(num_instances).astype(np.float32) y_train[0 : int(num_instances / 2)] = 1 x_train = np.expand_dims( x_train, axis=2 ) # This step it's very important an CNN will only accept this data shape print(x_train.shape) print(y_train.shape) # Prepare Automodel for search input_node = ak.Input() output_node = ak.ConvBlock()(input_node) # output_node = ak.DenseBlock()(output_node) #optional # output_node = ak.SpatialReduction()(output_node) #optional output_node = ak.ClassificationHead(num_classes=2, multi_label=True)( output_node ) auto_model = ak.AutoModel( inputs=input_node, outputs=output_node, overwrite=True, max_trials=1 ) # Search auto_model.fit(x_train, y_train, epochs=1) print(auto_model.evaluate(x_test, y_test)) # Export as a Keras Model model = auto_model.export_model() print(type(model.summary())) # print model as image keras.utils.plot_model( model, show_shapes=True, expand_treeed=True, to_file="name.png" ) ================================================ FILE: examples/celeb_age.py ================================================ """ Regression tasks estimate a numeric variable, such as the price of a house or voter turnout. This example is adapted from a [notebook](https://gist.github.com/mapmeld/98d1e9839f2d1f9c4ee197953661ed07) which estimates a person's age from their image, trained on the [IMDB-WIKI](https://data.vision.ee.ethz.ch/cvl/rrothe/imdb-wiki/) photographs of famous people. First, prepare your image data in a numpy.ndarray. Each image must have the same shape, meaning each has the same width, height, and color channels as other images in the set. """ from datetime import datetime from datetime import timedelta import numpy as np import pandas as pd from google.colab import drive from PIL import Image from scipy.io import loadmat import autokeras as ak """ ### Connect your Google Drive for Data """ drive.mount("/content/drive") """ ### Install AutoKeras Download the master branch to your Google Drive for this tutorial. In general, you can use *pip install autokeras* . """ """shell !pip install -v "/content/drive/My Drive/AutoKeras-dev/autokeras-master.zip" !pip uninstall keras-tuner !pip install git+git://github.com/keras-team/keras-tuner.git@d2d69cba21a0b482a85ce2a38893e2322e139c01 """ """shell !pip install torch """ """ ###**Import IMDB Celeb images and metadata** """ """shell !mkdir "./drive/My Drive/mlin/celebs" """ """shell ! wget -O "./drive/My Drive/mlin/celebs/imdb_0.tar" https://data.vision.ee.ethz.ch/cvl/rrothe/imdb-wiki/static/imdb_0.tar """ """shell ! cd "./drive/My Drive/mlin/celebs" && tar -xf imdb_0.tar ! rm "./drive/My Drive/mlin/celebs/imdb_0.tar" """ """ Uncomment and run the below cell if you need to re-run the cells again and above don't need to install everything from the beginning. """ # ! cd ./drive/My\ Drive/mlin/celebs. """shell ! ls "./drive/My Drive/mlin/celebs/imdb/" """ """shell ! wget https://data.vision.ee.ethz.ch/cvl/rrothe/imdb-wiki/static/imdb_meta.tar ! tar -xf imdb_meta.tar ! rm imdb_meta.tar """ """ ###**Converting from MATLAB date to actual Date-of-Birth** """ def datenum_to_datetime(datenum): """ Convert Matlab datenum into Python datetime. """ days = datenum % 1 hours = days % 1 * 24 minutes = hours % 1 * 60 seconds = minutes % 1 * 60 try: return ( datetime.fromordinal(int(datenum)) + timedelta(days=int(days)) + timedelta(hours=int(hours)) + timedelta(minutes=int(minutes)) + timedelta(seconds=round(seconds)) - timedelta(days=366) ) except Exception: return datenum_to_datetime(700000) print(datenum_to_datetime(734963)) """ ### **Opening MatLab file to Pandas DataFrame** """ x = loadmat("imdb/imdb.mat") mdata = x["imdb"] # variable in mat file mdtype = mdata.dtype # dtypes of structures are "unsized objects" ndata = {n: mdata[n][0, 0] for n in mdtype.names} columns = [n for n, v in ndata.items()] rows = [] for col in range(0, 10): values = list(ndata.items())[col] for num, val in enumerate(values[1][0], start=0): if col == 0: rows.append([]) if num > 0: if columns[col] == "dob": rows[num].append(datenum_to_datetime(int(val))) elif columns[col] == "photo_taken": rows[num].append(datetime(year=int(val), month=6, day=30)) else: rows[num].append(val) dt = map(lambda row: np.array(row), np.array(rows[1:])) df = pd.DataFrame(data=dt, index=range(0, len(rows) - 1), columns=columns) print(df.head()) print(columns) print(df["full_path"]) """ ### **Calculating age at time photo was taken** """ df["age"] = (df["photo_taken"] - df["dob"]).astype("int") / 31558102e9 print(df["age"]) """ ### **Creating dataset** * We sample 200 of the images which were included in this first download. * Images are resized to 128x128 to standardize shape and conserve memory * RGB images are converted to grayscale to standardize shape * Ages are converted to ints """ def df2numpy(train_set): images = [] for img_path in train_set["full_path"]: img = ( Image.open("./drive/My Drive/mlin/celebs/imdb/" + img_path[0]) .resize((128, 128)) .convert("L") ) images.append(np.asarray(img, dtype="int32")) image_inputs = np.array(images) ages = train_set["age"].astype("int").to_numpy() return image_inputs, ages train_set = df[df["full_path"] < "02"].sample(200) train_imgs, train_ages = df2numpy(train_set) test_set = df[df["full_path"] < "02"].sample(100) test_imgs, test_ages = df2numpy(test_set) """ ### **Training using AutoKeras** """ # Initialize the image regressor reg = ak.ImageRegressor(max_trials=15) # AutoKeras tries 15 different models. # Find the best model for the given training data reg.fit(train_imgs, train_ages) # Predict with the chosen model: # predict_y = reg.predict(test_images) # Uncomment if required # Evaluate the chosen model with testing data print(reg.evaluate(train_imgs, train_ages)) """ ### **Validation Data** By default, AutoKeras use the last 20% of training data as validation data. As shown in the example below, you can use validation_split to specify the percentage. """ reg.fit( train_imgs, train_ages, # Split the training data and use the last 15% as validation data. validation_split=0.15, epochs=3, ) """ You can also use your own validation set instead of splitting it from the training data with validation_data. """ split = 460000 x_val = train_imgs[split:] y_val = train_ages[split:] x_train = train_imgs[:split] y_train = train_ages[:split] reg.fit( x_train, y_train, # Use your own validation set. validation_data=(x_val, y_val), epochs=3, ) """ ### **Customized Search Space** For advanced users, you may customize your search space by using AutoModel instead of ImageRegressor. You can configure the ImageBlock for some high-level configurations, e.g., block_type for the type of neural network to search, normalize for whether to do data normalization, augment for whether to do data augmentation. You can also choose not to specify these arguments, which would leave the different choices to be tuned automatically. See the following example for detail. """ input_node = ak.ImageInput() output_node = ak.ImageBlock( # Only search ResNet architectures. block_type="resnet", # Normalize the dataset. normalize=True, # Do not do data augmentation. augment=False, )(input_node) output_node = ak.RegressionHead()(output_node) reg = ak.AutoModel(inputs=input_node, outputs=output_node, max_trials=10) reg.fit(x_train, y_train, epochs=3) """ The usage of AutoModel is similar to the functional API of Keras. Basically, you are building a graph, whose edges are blocks and the nodes are intermediate outputs of blocks. To add an edge from input_node to output_node with output_node = ak.some_block(input_node). You can even also use more fine grained blocks to customize the search space even further. See the following example. """ input_node = ak.ImageInput() output_node = ak.Normalization()(input_node) output_node = ak.ImageAugmentation(translation_factor=0.3)(output_node) output_node = ak.ResNetBlock(version="v2")(output_node) output_node = ak.RegressionHead()(output_node) clf = ak.AutoModel(inputs=input_node, outputs=output_node, max_trials=10) clf.fit(x_train, y_train, epochs=3) """ ## References [Main Reference Notebook](https://gist.github.com/mapmeld/98d1e9839f2d1f9c4ee197953661ed07), [Dataset](https://data.vision.ee.ethz.ch/cvl/rrothe/imdb-wiki/), [ImageRegressor](/image_regressor), [ResNetBlock](/block/#resnetblock-class), [ImageInput](/node/#imageinput-class), [AutoModel](/auto_model/#automodel-class), [ImageBlock](/block/#imageblock-class), [Normalization](/preprocessor/#normalization-class), [ImageAugmentation](/preprocessor/#image-augmentation-class), [RegressionHead](/head/#regressionhead-class). """ ================================================ FILE: examples/cifar10.py ================================================ from keras.datasets import cifar10 import autokeras as ak # Prepare the dataset. (x_train, y_train), (x_test, y_test) = cifar10.load_data() # Initialize the ImageClassifier. clf = ak.ImageClassifier(max_trials=3) # Search for the best model. clf.fit(x_train, y_train, epochs=5) # Evaluate on the testing data. print("Accuracy: {accuracy}".format(accuracy=clf.evaluate(x_test, y_test)[1])) ================================================ FILE: examples/imdb.py ================================================ """ Search for a good model for the [IMDB]( https://keras.io/datasets/#imdb-movie-reviews-sentiment-classification) dataset. """ import keras import numpy as np import autokeras as ak def imdb_raw(): max_features = 20000 index_offset = 3 # word index offset (x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data( num_words=max_features, index_from=index_offset ) x_train = x_train y_train = y_train.reshape(-1, 1) x_test = x_test y_test = y_test.reshape(-1, 1) word_to_id = keras.datasets.imdb.get_word_index() word_to_id = {k: (v + index_offset) for k, v in word_to_id.items()} word_to_id[""] = 0 word_to_id[""] = 1 word_to_id[""] = 2 id_to_word = {value: key for key, value in word_to_id.items()} x_train = list( map(lambda sentence: " ".join(id_to_word[i] for i in sentence), x_train) ) x_test = list( map(lambda sentence: " ".join(id_to_word[i] for i in sentence), x_test) ) x_train = np.array(x_train, dtype=np.str) x_test = np.array(x_test, dtype=np.str) return (x_train, y_train), (x_test, y_test) # Prepare the data. (x_train, y_train), (x_test, y_test) = imdb_raw() print(x_train.shape) # (25000,) print(y_train.shape) # (25000, 1) print(x_train[0][:50]) # this film was just brilliant casting # Initialize the TextClassifier clf = ak.TextClassifier(max_trials=3) # Search for the best model. clf.fit(x_train, y_train, epochs=2, batch_size=8) # Evaluate on the testing data. print("Accuracy: {accuracy}".format(accuracy=clf.evaluate(x_test, y_test))) ================================================ FILE: examples/mnist.py ================================================ """ Search for a good model for the [MNIST](https://keras.io/datasets/#mnist-database-of-handwritten-digits) dataset. """ from keras.datasets import mnist import autokeras as ak # Prepare the dataset. (x_train, y_train), (x_test, y_test) = mnist.load_data() print(x_train.shape) # (60000, 28, 28) print(y_train.shape) # (60000,) print(y_train[:3]) # array([7, 2, 1], dtype=uint8) # Initialize the ImageClassifier. clf = ak.ImageClassifier(max_trials=3) # Search for the best model. clf.fit(x_train, y_train, epochs=10) # Evaluate on the testing data. print("Accuracy: {accuracy}".format(accuracy=clf.evaluate(x_test, y_test))) ================================================ FILE: examples/new_pop.py ================================================ """shell pip install autokeras """ import pandas as pd import autokeras as ak """ ## Social Media Articles Example Regression tasks estimate a numeric variable, such as the price of a house or a person's age. This example estimates the view counts for an article on social media platforms, trained on a [News Popularity]( https://archive.ics.uci.edu/ml/datasets/ News+Popularity+in+Multiple+Social+Media+Platforms) dataset collected from 2015-2016. First, prepare your text data in a `numpy.ndarray` format. """ # converting from other formats (such as pandas) to numpy df = pd.read_csv("./News_Final.csv") text_inputs = df.Title.to_numpy(dtype="str") media_success_outputs = df.Facebook.to_numpy(dtype="int") """ Next, initialize and train the [TextRegressor](/text_regressor). """ # Initialize the text regressor reg = ak.TextRegressor(max_trials=15) # AutoKeras tries 15 different models. # Find the best model for the given training data reg.fit(text_inputs, media_success_outputs) # Predict with the chosen model: predict_y = reg.predict(text_inputs) ================================================ FILE: examples/reuters.py ================================================ """shell !pip install -q -U pip !pip install -q -U autokeras==1.0.8 !pip install -q git+https://github.com/keras-team/keras-tuner.git@1.0.2rc1 """ import keras import numpy as np from keras.datasets import reuters import autokeras as ak """ Search for a good model for the [Reuters](https://keras.io/ja/datasets/#_5) dataset. """ # Prepare the dataset. def reuters_raw(max_features=20000): index_offset = 3 # word index offset (x_train, y_train), (x_test, y_test) = reuters.load_data( num_words=max_features, index_from=index_offset ) x_train = x_train y_train = y_train.reshape(-1, 1) x_test = x_test y_test = y_test.reshape(-1, 1) word_to_id = reuters.get_word_index() word_to_id = {k: (v + index_offset) for k, v in word_to_id.items()} word_to_id[""] = 0 word_to_id[""] = 1 word_to_id[""] = 2 id_to_word = {value: key for key, value in word_to_id.items()} x_train = list( map(lambda sentence: " ".join(id_to_word[i] for i in sentence), x_train) ) x_test = list( map(lambda sentence: " ".join(id_to_word[i] for i in sentence), x_test) ) x_train = np.array(x_train, dtype=np.str) x_test = np.array(x_test, dtype=np.str) return (x_train, y_train), (x_test, y_test) # Prepare the data. (x_train, y_train), (x_test, y_test) = reuters_raw() print(x_train.shape) # (8982,) print(y_train.shape) # (8982, 1) print(x_train[0][:50]) # said as a result of its decemb # Initialize the TextClassifier clf = ak.TextClassifier( max_trials=5, overwrite=True, ) # Callback to avoid overfitting with the EarlyStopping. cbs = [ keras.callbacks.EarlyStopping(patience=3), ] # Search for the best model. clf.fit(x_train, y_train, epochs=10, callback=cbs) # Evaluate on the testing data. print("Accuracy: {accuracy}".format(accuracy=clf.evaluate(x_test, y_test))) ================================================ FILE: pyproject.toml ================================================ [build-system] requires = ["setuptools>=61.0"] build-backend = "setuptools.build_meta" [project] name = "autokeras" authors = [ {name = "Keras team", email = "keras-users@googlegroups.com"}, ] description = "AutoML for deep learning" readme = "README.md" requires-python = ">=3.8" license = {text = "Apache License 2.0"} dynamic = ["version"] classifiers = [ "Intended Audience :: Developers", "Intended Audience :: Education", "Intended Audience :: Science/Research", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Topic :: Scientific/Engineering :: Mathematics", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Software Development :: Libraries", ] dependencies = [ "packaging", "keras-tuner>=1.4.0", "keras>=3.0.0", "dm-tree", ] [project.optional-dependencies] tests = [ "pandas", "pytest>=4.4.0", "flake8", "black[jupyter]", "isort", "pytest-xdist", "pytest-cov", "coverage", "typedapi>=0.2,<0.3", "scikit-learn", ] [project.urls] Home = "https://autokeras.com/" Repository = "https://github.com/keras-team/autokeras" [tool.setuptools.dynamic] version = {attr = "autokeras.__init__.__version__"} [tool.setuptools.packages.find] include = ["autokeras", "autokeras.*"] [tool.black] line-length = 80 target-version = [] [tool.isort] profile = "black" known_first_party = ["autokeras", "tests"] default_section = "THIRDPARTY" line_length = 80 force_single_line = "True" ================================================ FILE: setup.cfg ================================================ [tool:pytest] addopts=-v -p no:warnings --durations=10 --log-cli-level=CRITICAL # Do not run tests in the build folder norecursedirs= build [coverage:report] exclude_lines = pragma: no cover @abstract raise NotImplementedError omit = *test* autokeras/prototype/* [flake8] # imported but unused in __init__.py, that's ok. per-file-ignores = **/__init__.py:F401 ignore = E203, W503 max-line-length = 80 ================================================ FILE: shell/contributors.py ================================================ import base64 import json import os import sys from io import BytesIO import requests from PIL import Image def main(directory): contributors = [] for contributor in json.load(open("contributors.json")): if contributor["type"] != "User": continue if contributor["login"] == "codacy-badger": continue contributors.append(contributor) size = 36 gap = 3 elem_per_line = 22 width = elem_per_line * (size + gap) + gap height = ((len(contributors) - 1) // elem_per_line + 1) * (size + gap) + gap html = ' ' + '' ) temp = temp.format( html_url=contributor["html_url"], x=x, y=y, img_str=img_str, login=contributor["login"], ) html += temp + "\n" html += "" print(html) if __name__ == "__main__": main(sys.argv[1]) ================================================ FILE: shell/contributors.sh ================================================ # node shell/generate_json.js gh api -H "Accept: application/vnd.github+json" /repos/keras-team/autokeras/contributors --paginate > response.json sed "s/\]\[/,/g" response.json > contributors.json rm response.json mkdir avatars python shell/contributors.py avatars > docs/templates/img/contributors.svg rm contributors.json rm -rf avatars ================================================ FILE: shell/copyright.txt ================================================ # Copyright 2020 The AutoKeras Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ================================================ FILE: shell/cov.sh ================================================ pytest tests --cov-report html --cov-report xml:cov.xml --cov=autokeras ================================================ FILE: shell/coverage.sh ================================================ pytest --cov-report xml:cov.xml --cov autokeras $1 ================================================ FILE: shell/docs.sh ================================================ #!/usr/bin/env bash cd docs python autogen.py mkdocs build cd .. sh shell/format.sh echo "autokeras.com" > docs/site/CNAME git checkout -b gh-pages-temp git add -f docs/site git commit -m "gh-pages update" git subtree split --prefix docs/site -b gh-pages git push -f origin gh-pages:gh-pages git branch -D gh-pages git checkout master git branch -D gh-pages-temp ================================================ FILE: shell/format.sh ================================================ isort . black . for i in $(find autokeras benchmark -name '*.py') do if ! grep -q Copyright $i then echo $i cat shell/copyright.txt $i >$i.new && mv $i.new $i fi done flake8 . ================================================ FILE: shell/generate_json.js ================================================ const { Octokit } = require("@octokit/rest"); const fs = require('fs') const octokit = new Octokit({}); octokit.paginate(octokit.repos.listContributors,{ owner: 'keras-team', repo: 'autokeras', }).then((contributors) => { fs.writeFileSync('contributors.json', JSON.stringify(contributors)) }); ================================================ FILE: shell/lint.sh ================================================ isort -c . if ! [ $? -eq 0 ] then echo "Please run \"sh shell/format.sh\" to format the code." exit 1 fi flake8 . if ! [ $? -eq 0 ] then echo "Please fix the code style issue." exit 1 fi black --check . if ! [ $? -eq 0 ] then echo "Please run \"sh shell/format.sh\" to format the code." exit 1 fi for i in $(find autokeras benchmark -name '*.py') # or whatever other pattern... do if ! grep -q Copyright $i then echo "Please run \"sh shell/format.sh\" to format the code." exit 1 fi done ================================================ FILE: shell/perf.sh ================================================ pytest tests/performance.py | tee perf_output.txt ================================================ FILE: shell/pre-commit.sh ================================================ #!/usr/bin/env bash set -e export DOCKER_BUILDKIT=1 docker build -t autokeras_formatting -f docker/pre-commit.Dockerfile . docker run --rm -t -v "$(pwd -P):/autokeras" autokeras_formatting ================================================ FILE: shell/pypi.sh ================================================ #!/usr/bin/env bash rm dist/* python -m build twine upload --repository-url https://upload.pypi.org/legacy/ dist/*