Repository: keras-team/autokeras
Branch: master
Commit: a2446cf16edc
Files: 184
Total size: 601.8 KB

Directory structure:
gitextract_87bybot9/

├── .devcontainer/
│   ├── Dockerfile
│   ├── devcontainer.json
│   └── setup.sh
├── .dockerignore
├── .github/
│   ├── CODEOWNERS
│   ├── CODE_OF_CONDUCT.md
│   ├── CONTRIBUTING.md
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug_report.md
│   │   ├── custom.md
│   │   ├── feature_request.md
│   │   └── new-task-template.md
│   ├── PULL_REQUEST_TEMPLATE.md
│   ├── dependabot.yml
│   └── workflows/
│       └── actions.yml
├── .gitignore
├── LICENSE
├── README.md
├── RELEASE.md
├── autokeras/
│   ├── __init__.py
│   ├── adapters/
│   │   ├── __init__.py
│   │   ├── input_adapters.py
│   │   ├── input_adapters_test.py
│   │   ├── output_adapters.py
│   │   └── output_adapters_test.py
│   ├── analysers/
│   │   ├── __init__.py
│   │   ├── input_analysers.py
│   │   ├── input_analysers_test.py
│   │   ├── output_analysers.py
│   │   └── output_analysers_test.py
│   ├── auto_model.py
│   ├── auto_model_test.py
│   ├── blocks/
│   │   ├── __init__.py
│   │   ├── basic.py
│   │   ├── basic_test.py
│   │   ├── heads.py
│   │   ├── heads_test.py
│   │   ├── preprocessing.py
│   │   ├── preprocessing_test.py
│   │   ├── reduction.py
│   │   ├── reduction_test.py
│   │   ├── wrapper.py
│   │   └── wrapper_test.py
│   ├── conftest.py
│   ├── engine/
│   │   ├── __init__.py
│   │   ├── adapter.py
│   │   ├── adapter_test.py
│   │   ├── analyser.py
│   │   ├── analyser_test.py
│   │   ├── block.py
│   │   ├── block_test.py
│   │   ├── head.py
│   │   ├── head_test.py
│   │   ├── hyper_preprocessor.py
│   │   ├── io_hypermodel.py
│   │   ├── named_hypermodel.py
│   │   ├── node.py
│   │   ├── node_test.py
│   │   ├── preprocessor.py
│   │   ├── serializable.py
│   │   ├── tuner.py
│   │   └── tuner_test.py
│   ├── graph.py
│   ├── graph_test.py
│   ├── hyper_preprocessors.py
│   ├── hyper_preprocessors_test.py
│   ├── integration_tests/
│   │   ├── __init__.py
│   │   ├── functional_api_test.py
│   │   ├── io_api_test.py
│   │   └── task_api_test.py
│   ├── keras_layers.py
│   ├── keras_layers_test.py
│   ├── nodes.py
│   ├── nodes_test.py
│   ├── pipeline.py
│   ├── pipeline_test.py
│   ├── preprocessors/
│   │   ├── __init__.py
│   │   ├── common.py
│   │   ├── common_test.py
│   │   ├── encoders.py
│   │   ├── encoders_test.py
│   │   ├── postprocessors.py
│   │   └── postprocessors_test.py
│   ├── tasks/
│   │   ├── __init__.py
│   │   ├── image.py
│   │   ├── image_test.py
│   │   ├── structured_data.py
│   │   ├── structured_data_test.py
│   │   ├── text.py
│   │   └── text_test.py
│   ├── test_utils.py
│   ├── tuners/
│   │   ├── __init__.py
│   │   ├── bayesian_optimization.py
│   │   ├── greedy.py
│   │   ├── greedy_test.py
│   │   ├── hyperband.py
│   │   ├── hyperband_test.py
│   │   ├── random_search.py
│   │   ├── random_search_test.py
│   │   ├── task_specific.py
│   │   └── task_specific_test.py
│   └── utils/
│       ├── __init__.py
│       ├── data_utils.py
│       ├── data_utils_test.py
│       ├── io_utils.py
│       ├── layer_utils.py
│       ├── types.py
│       ├── utils.py
│       └── utils_test.py
├── benchmark/
│   ├── README.md
│   ├── __init__.py
│   ├── datasets/
│   │   ├── titanic_test.csv
│   │   └── titanic_train.csv
│   ├── experiments/
│   │   ├── __init__.py
│   │   ├── experiment.py
│   │   ├── image.py
│   │   ├── structured_data.py
│   │   └── text.py
│   ├── performance.py
│   └── run.py
├── codecov.yml
├── docker/
│   ├── Dockerfile
│   ├── Makefile
│   ├── README.md
│   ├── devel.Dockerfile
│   ├── pre-commit.Dockerfile
│   └── pre_commit.py
├── docs/
│   ├── README.md
│   ├── autogen.py
│   ├── ipynb/
│   │   ├── customized.ipynb
│   │   ├── export.ipynb
│   │   ├── image_classification.ipynb
│   │   ├── image_regression.ipynb
│   │   ├── multi.ipynb
│   │   ├── structured_data_classification.ipynb
│   │   ├── structured_data_regression.ipynb
│   │   ├── text_classification.ipynb
│   │   └── text_regression.ipynb
│   ├── keras_autodoc/
│   │   ├── __init__.py
│   │   ├── autogen.py
│   │   ├── docstring.py
│   │   ├── examples.py
│   │   ├── gathering_members.py
│   │   ├── get_signatures.py
│   │   └── utils.py
│   ├── mkdocs.yml
│   ├── py/
│   │   ├── customized.py
│   │   ├── export.py
│   │   ├── image_classification.py
│   │   ├── image_regression.py
│   │   ├── multi.py
│   │   ├── structured_data_classification.py
│   │   ├── structured_data_regression.py
│   │   ├── text_classification.py
│   │   └── text_regression.py
│   ├── requirements.txt
│   ├── run_py_files.sh
│   ├── templates/
│   │   ├── about.md
│   │   ├── index.md
│   │   ├── install.md
│   │   ├── stylesheets/
│   │   │   └── extra.css
│   │   └── tutorial/
│   │       ├── faq.md
│   │       └── overview.md
│   └── tutobooks.py
├── examples/
│   ├── automodel_with_cnn.py
│   ├── celeb_age.py
│   ├── cifar10.py
│   ├── imdb.py
│   ├── mnist.py
│   ├── new_pop.py
│   └── reuters.py
├── pyproject.toml
├── setup.cfg
└── shell/
    ├── contributors.py
    ├── contributors.sh
    ├── copyright.txt
    ├── cov.sh
    ├── coverage.sh
    ├── docs.sh
    ├── format.sh
    ├── generate_json.js
    ├── lint.sh
    ├── perf.sh
    ├── pre-commit.sh
    └── pypi.sh

================================================
FILE CONTENTS
================================================

================================================
FILE: .devcontainer/Dockerfile
================================================
FROM mcr.microsoft.com/vscode/devcontainers/python:3.10
COPY setup.sh /setup.sh


================================================
FILE: .devcontainer/devcontainer.json
================================================
{
    "dockerFile": "Dockerfile",
    "postCreateCommand": "sh /setup.sh",
	"customizations": {
		"vscode": {
			"settings": {
				"python.linting.enabled": true,
				"python.linting.flake8Enabled": true,
				"python.linting.pylintEnabled": false,
				"python.testing.pytestEnabled": true,
				"editor.formatOnSave": true,
				"editor.codeActionsOnSave": {
					"source.organizeImports": true
				},
				"[python]": {
					"editor.defaultFormatter": "ms-python.black-formatter"
				},
				"editor.rulers": [
					80
				]
			},
			"extensions": [
				"ms-python.python",
				"ms-python.isort",
				"ms-python.flake8",
				"ms-python.black-formatter"
			]
		}
	},
    "features": {
        "ghcr.io/devcontainers/features/github-cli:1": {}
    }
}

================================================
FILE: .devcontainer/setup.sh
================================================
sudo pip install --upgrade pip
sudo pip install -e ".[tests]"
echo "sh shell/lint.sh" > .git/hooks/pre-commit
chmod a+x .git/hooks/pre-commit

================================================
FILE: .dockerignore
================================================
.git
.github
*.Dockerfile


================================================
FILE: .github/CODEOWNERS
================================================
* @haifeng-jin @fchollet


================================================
FILE: .github/CODE_OF_CONDUCT.md
================================================
# Contributor Covenant Code of Conduct

## Our Pledge

In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.

## Our Standards

Examples of behavior that contributes to creating a positive environment include:

* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members

Examples of unacceptable behavior by participants include:

* The use of sexualized language or imagery and unwelcome sexual attention or advances
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a professional setting

## Our Responsibilities

Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.

Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.

## Scope

This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.

## Enforcement

Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at jin@tamu.edu. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.

Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.

## Attribution

This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]

[homepage]: http://contributor-covenant.org
[version]: http://contributor-covenant.org/version/1/4/


================================================
FILE: .github/CONTRIBUTING.md
================================================
# Contributing Guide

Contributions are welcome, and greatly appreciated!
This page is only a guide of the best practices of contributing code to AutoKeras.
The best way to contribute is to join our community by reading [this](https://autokeras.com/#contributing-code).
We will get you started right away.

Follow the tag of [good first issue](https://github.com/keras-team/autokeras/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22)
for the issues for beginner.

## Pull Request Guide

1. Is this the first pull request that you're making with GitHub? If so, read the guide [Making a pull request to an open-source project](https://github.com/gabrieldemarmiesse/getting_started_open_source).

2. Include "resolves #issue_number" in the description of the pull request if applicable. Briefly describe your contribution.

3. Submit the pull request from the first day of your development and create it as a [draft pull request](https://github.blog/2019-02-14-introducing-draft-pull-requests/). Click `ready for review` when finished and passed the all the checks.

4. For the case of bug fixes, add new test cases which would fail before your bug fix.


## Setup Environment
We introduce 3 different options: **GitHub Codespaces**, **VS Code & Dev Containers**, **the general setup**.
You can choose base on your preference.

### Option 1: GitHub Codespaces
You can simply open the repository in GitHub Codespaces.
The environment is already setup there.

### Option 2: VS Code & Dev Containers
Open VS Code.
Install the `Dev Containers` extension.
Press `F1` key. Enter `Dev Containers: Open Folder in Container...` to open the repository root folder.
The environment is already setup there.

### Option 3: The General Setup

Install [Virtualenvwrapper](https://virtualenvwrapper.readthedocs.io/en/latest/).
Create a new virtualenv named `ak` based on python3.
```
mkvirtualenv -p python3 ak 
```
Please use this virtualenv for development.

Clone the repo. Go to the repo directory.
Run the following commands.
```
workon ak

pip install -e ".[tests]"
pip uninstall autokeras
add2virtualenv .
``` 

## Run Tests

### GitHub Codespaces or VS Code & Dev Containers
If you are using "GitHub Codespaces" or "VS Code & Dev Containers",
you can simply open any `*_test.py` file under the `tests` directory,
and wait a few seconds, you will see the test tab on the left of the window.

### General Setup

If you are using the general setup.

Activate the virtualenv.
Go to the repo directory
Run the following lines to run the tests.

Run all the tests.
```
pytest tests
```

Run all the unit tests.
```
pytest tests/autokeras
```

Run all the integration tests.
```
pytest tests/integration_tests
```

## Code Style
You can run the following manually every time you want to format your code.
1. Run `shell/format.sh` to format your code.
2. Run `shell/lint.sh` to check.

## Docstrings
Docstrings should follow our style.
Just check the style of other docstrings in AutoKeras.


================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.md
================================================
---
name: Bug report
about: Create a report to help us improve
labels: ["bug report"]
title: "Bug: "

---

### Bug Description
<!---
A clear and concise description of what the bug is.
-->

### Bug Reproduction
Code for reproducing the bug:

Data used by the code:

### Expected Behavior
<!---
If not so obvious to see the bug from the running results,
please briefly describe the expected behavior.
-->

### Setup Details
Include the details about the versions of:
 - OS type and version:
 - Python:
 - autokeras: <!--- e.g. 0.4.0, 1.0.2, master-->
 - keras-tuner:
 - scikit-learn:
 - numpy:
 - pandas:
 - tensorflow:

### Additional context
<!---
If applicable, add any other context about the problem.
-->


================================================
FILE: .github/ISSUE_TEMPLATE/custom.md
================================================
---
name: Custom issue template
about: Describe this issue template's purpose here.

---


================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.md
================================================
---
name: Feature request
about: Suggest an idea for this project
labels: ["feature request"]
title: "Feature: "

---

### Feature Description

### Code Example
<!---
Please provide a code example for using that feature
given the proposed feature is implemented.
-->
```python
```

### Reason
<!---
Why do we need the feature?
-->

### Solution
<!---
Please tell us how to implement the feature,
if you have one in mind.
-->


================================================
FILE: .github/ISSUE_TEMPLATE/new-task-template.md
================================================
---
name: New Task Template
about: Add a new machine learning task
labels: ["algorithm"]
title: "New Task: "

---

<!---
Please label your issue with `new_task_module`.
-->

### Suggested Name
<!---
Give a suggested name of the new task module, e.g. TextClassifier, ImageClassifier.
-->

### Task Description
<!---
A clear and concise description of the machine learning task to be added, its problem statement and learning outcome.
-->

### Evaluation Metrics
<!---
e.g. Mean Square Error, F1-score, Accuracy, AUC.
-->

### Benchmark Datasets
<!---
Any public available datasets you know for the task,
please provide the name and link.
-->

### Reason
<!---
A clear and concise description of why this feature would be useful for the project.
-->

### Solution
<!---
A clear and concise description of what you want to happen.
-->

### Additional Context
<!---
Add any other context or screenshots about the feature request here.
-->


================================================
FILE: .github/PULL_REQUEST_TEMPLATE.md
================================================
<!-- STEP 1: Give the pull request a meaningful title. -->
### Which issue(s) does this Pull Request fix?
<!-- STEP 2: Replace the "000" with the issue ID this pull request resolves. -->
resolves #000

### Details of the Pull Request
<!-- STEP 3: Add details/comments on the pull request. -->

<!-- STEP 4: If the pull request is in progress, click the down green arrow to select "Create Draft Pull Request", and click the button. If the pull request is ready to be reviewed, click "Create Pull Request" button directly. -->


================================================
FILE: .github/dependabot.yml
================================================
version: 2
updates:
- package-ecosystem: pip
  directory: "/"
  schedule:
    interval: daily
    time: "11:00"
  open-pull-requests-limit: 10
  ignore:
  - dependency-name: sphinx
    versions:
    - ">= 3.1.a, < 3.2"
  - dependency-name: typeguard
    versions:
    - ">= 2.11.a, < 2.12"
  - dependency-name: typeguard
    versions:
    - ">= 2.12.a, < 2.13"


================================================
FILE: .github/workflows/actions.yml
================================================
name: Tests

on:
  push:
    branches: [ master ]
  pull_request:
  release:
    types: [created]
jobs:
  build:
    name: Run tests
    runs-on: ubuntu-latest
    env:
      KERAS_BACKEND: torch
    steps:
    - uses: actions/checkout@v3
    - name: Set up Python 3.10
      uses: actions/setup-python@v4
      with:
        python-version: '3.10'
    - name: Get pip cache dir
      id: pip-cache
      run: |
        python -m pip install --upgrade pip setuptools
        echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT
    - name: pip cache
      uses: actions/cache@v3
      with:
        path: ${{ steps.pip-cache.outputs.dir }}
        key: ${{ runner.os }}-pip-${{ hashFiles('pyproject.toml') }}
    - name: Install dependencies
      run: |
          pip install torch --index-url https://download.pytorch.org/whl/cpu
          pip install -e ".[tests]" --progress-bar off
    - name: Test with pytest
      run: |
        pytest --cov=autokeras --cov-report xml:coverage.xml
    - name: Codecov
      uses: codecov/codecov-action@v3
      with:
        token: ${{ secrets.CODECOV_TOKEN }}
        files: ./coverage.xml
        flags: unittests
        fail_ci_if_error: true
  format:
    name: Check the code format
    runs-on: ubuntu-latest
    env:
      KERAS_BACKEND: torch
    steps:
      - uses: actions/checkout@v3
      - name: Run pre-commit
        run: bash shell/pre-commit.sh
  build-docs:
    name: Build the docs
    runs-on: ubuntu-latest
    env:
      KERAS_BACKEND: torch
    steps:
      - uses: actions/checkout@v3
      - name: Set up Python 3.10
        uses: actions/setup-python@v4
        with:
          python-version: '3.10'
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip setuptools
          pip install torch --index-url https://download.pytorch.org/whl/cpu
          pip install -e .
          pip install -r docs/requirements.txt
      - name: Build the docs
        run: |
          cd docs
          python autogen.py
          mkdocs build
  deploy:
    needs: [build, format, build-docs]
    if: github.event_name == 'release' && github.event.action == 'created'
    runs-on: ubuntu-latest
    env:
      KERAS_BACKEND: torch
    steps:
    - uses: actions/checkout@v3
    - name: Set up Python
      uses: actions/setup-python@v4
      with:
        python-version: '3.10'
    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip build twine
    - name: Build and publish
      env:
        TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
      run: |
        python -m build
        twine upload dist/*


================================================
FILE: .gitignore
================================================
# vim swp files
*.swp
# caffe/pytorch model files
*.pth

# Mkdocs
/docs/sources
/docs/site

.DS_Store
.vscode
settings.json
.idea
.pytest_cache
/experiments

# resource temp folder
tests/resources/temp/*
!tests/resources/temp/.gitkeep

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
cov.xml
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/

# Translations
*.mo
*.pot

# Django stuff:
*.log
.static_storage/
.media/
local_settings.py

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/

examples/text_cnn/glove_embedding/


================================================
FILE: LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright 2020 The AutoKeras Authors.

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: README.md
================================================
<p align="center">
  <img width="500" alt="logo" src="https://autokeras.com/img/row_red.svg"/>
</p>

[![](https://github.com/keras-team/autokeras/workflows/Tests/badge.svg)](https://github.com/keras-team/autokeras/actions?query=workflow%3ATests+branch%3Amaster)
[![codecov](https://codecov.io/gh/keras-team/autokeras/branch/master/graph/badge.svg)](https://codecov.io/gh/keras-team/autokeras)
[![PyPI version](https://badge.fury.io/py/autokeras.svg)](https://badge.fury.io/py/autokeras)
[![contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat)](https://github.com/keras-team/autokeras/issues)

Official Website: [autokeras.com](https://autokeras.com)

##
AutoKeras: An AutoML system based on Keras.
It is developed by <a href="http://faculty.cs.tamu.edu/xiahu/index.html" target="_blank" rel="nofollow">DATA Lab</a> at Texas A&M University.
The goal of AutoKeras is to make machine learning accessible to everyone.

## Learning resources

* A short example.

```python
import autokeras as ak

clf = ak.ImageClassifier()
clf.fit(x_train, y_train)
results = clf.predict(x_test)
```

* [Official website tutorials](https://autokeras.com/tutorial/overview/).
* The book of [*Automated Machine Learning in Action*](https://www.manning.com/books/automated-machine-learning-in-action?query=automated&utm_source=jin&utm_medium=affiliate&utm_campaign=affiliate&a_aid=jin).
* The LiveProjects of [*Image Classification with AutoKeras*](https://www.manning.com/liveprojectseries/autokeras-ser).
<p align="center">
<a href="https://www.manning.com/books/automated-machine-learning-in-action?query=automated&utm_source=jin&utm_medium=affiliate&utm_campaign=affiliate&a_aid=jin"><img src="https://images.manning.com/360/480/resize/book/0/fc56aaf-b2ba-4ef4-85b3-4a31edbe8ecc/Song-AML-HI.png" alt="drawing" width="266"/></a>
&nbsp
&nbsp
<a href="https://www.manning.com/liveprojectseries/autokeras-ser"><img src="https://images.manning.com/360/480/resize/liveProjectSeries/9/38c715a-0c8c-4f66-b440-83d29993877a/ImageClassificationwithAutoKeras.jpg" alt="drawing" width="250"/></a>
</p>


## Installation

To install the package, please use the `pip` installation as follows:

```shell
pip3 install autokeras
```

Please follow the [installation guide](https://autokeras.com/install) for more details.

**Note:** Currently, AutoKeras is only compatible with **Python >= 3.7** and **TensorFlow >= 2.8.0**.

## Community

Ask your questions on our [GitHub Discussions](https://github.com/keras-team/autokeras/discussions).

## Contributing Code

Here is how we manage our project.

We pick the critical issues to work on from [GitHub issues](https://github.com/keras-team/autokeras/issues).
They will be added to this [Project](https://github.com/keras-team/autokeras/projects/3).
Some of the issues will then be added to the [milestones](https://github.com/keras-team/autokeras/milestones),
which are used to plan for the releases.

Refer to our [Contributing Guide](https://autokeras.com/contributing/) to learn the best practices.

Thank all the contributors!

[![The contributors](https://autokeras.com/img/contributors.svg)](https://github.com/keras-team/autokeras/graphs/contributors)

## Cite this work

Haifeng Jin, François Chollet, Qingquan Song, and Xia Hu. "AutoKeras: An AutoML Library for Deep Learning." *the Journal of machine Learning research* 6 (2023): 1-6. ([Download](http://jmlr.org/papers/v24/20-1355.html))

Biblatex entry:

```bibtex
@article{JMLR:v24:20-1355,
  author  = {Haifeng Jin and François Chollet and Qingquan Song and Xia Hu},
  title   = {AutoKeras: An AutoML Library for Deep Learning},
  journal = {Journal of Machine Learning Research},
  year    = {2023},
  volume  = {24},
  number  = {6},
  pages   = {1--6},
  url     = {http://jmlr.org/papers/v24/20-1355.html}
}
```

## Acknowledgements

The authors gratefully acknowledge the D3M program of the Defense Advanced Research Projects Agency (DARPA) administered through AFRL contract FA8750-17-2-0116; the Texas A&M College of Engineering, and Texas A&M University.


================================================
FILE: RELEASE.md
================================================
# Release v2.0.0

## Breaking changes

* Requires `keras>=3.0.0` instead of `tf.keras`.
* Removed the structured data related tasks by removing the following public
  APIs:
  * `CategoricalToNumerical`
  * `MultiCategoryEncoding`
  * `StructuredDataInput`
  * `StructuredDataBlock`
  * `StructuredDataClassifier`
  * `StructuredDataRegressor`
* Removed the Time series related tasks by removing the following public APIs:
  * `TimeseriesInput`
  * `TimeseriesForecaster`
* Reduced search space of Text related tasks by removing the following blocks.
  * `Embedding`
  * `TextToIntSequence`
  * `TextToNgramVector`
  * `Transformer`

# Release v1.1.0

## Breaking changes

* This only affect you if you use `BertTokenizer` or `BertEncoder` in AutoKeras
  explicity.  You are not affected if you only use `BertBlock`, `TextClassifier`
  or `TextRegressor`.  Removed the AutoKeras implementation of `BertTokenizer`
  and `BertEncoder`.  Use `keras-nlp` implementation instead.

## New features

## Bug fixes
* Now also support `numpy>=1.24`.


================================================
FILE: autokeras/__init__.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from autokeras.auto_model import AutoModel
from autokeras.blocks import ClassificationHead
from autokeras.blocks import ConvBlock
from autokeras.blocks import DenseBlock
from autokeras.blocks import EfficientNetBlock
from autokeras.blocks import Embedding
from autokeras.blocks import Flatten
from autokeras.blocks import ImageAugmentation
from autokeras.blocks import ImageBlock
from autokeras.blocks import Merge
from autokeras.blocks import Normalization
from autokeras.blocks import RegressionHead
from autokeras.blocks import ResNetBlock
from autokeras.blocks import RNNBlock
from autokeras.blocks import SpatialReduction
from autokeras.blocks import StructuredDataBlock
from autokeras.blocks import TemporalReduction
from autokeras.blocks import TextBlock
from autokeras.blocks import XceptionBlock
from autokeras.engine.block import Block
from autokeras.engine.head import Head
from autokeras.engine.node import Node
from autokeras.keras_layers import CastToFloat32
from autokeras.keras_layers import ExpandLastDim
from autokeras.nodes import ImageInput
from autokeras.nodes import Input
from autokeras.nodes import StructuredDataInput
from autokeras.nodes import TextInput
from autokeras.tasks import ImageClassifier
from autokeras.tasks import ImageRegressor
from autokeras.tasks import StructuredDataClassifier
from autokeras.tasks import StructuredDataRegressor
from autokeras.tasks import TextClassifier
from autokeras.tasks import TextRegressor
from autokeras.tuners import BayesianOptimization
from autokeras.tuners import Greedy
from autokeras.tuners import Hyperband
from autokeras.tuners import RandomSearch

__version__ = "3.0.0"

CUSTOM_OBJECTS = {
    "CastToFloat32": CastToFloat32,
    "ExpandLastDim": ExpandLastDim,
}


================================================
FILE: autokeras/adapters/__init__.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from autokeras.adapters.input_adapters import ImageAdapter
from autokeras.adapters.input_adapters import InputAdapter
from autokeras.adapters.input_adapters import StructuredDataAdapter
from autokeras.adapters.input_adapters import TextAdapter
from autokeras.adapters.output_adapters import ClassificationAdapter
from autokeras.adapters.output_adapters import RegressionAdapter


================================================
FILE: autokeras/adapters/input_adapters.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np

from autokeras.engine import adapter as adapter_module


class InputAdapter(adapter_module.Adapter):
    def check(self, x):
        """Record any information needed by transform."""
        if not isinstance(x, np.ndarray):
            raise TypeError(
                "Expect the data to Input to be numpy.ndarray, "
                "but got {type}.".format(type=type(x))
            )
        if isinstance(x, np.ndarray) and not np.issubdtype(x.dtype, np.number):
            raise TypeError(
                "Expect the data to Input to be numerical, but got "
                "{type}.".format(type=x.dtype)
            )


class ImageAdapter(adapter_module.Adapter):
    def check(self, x):
        """Record any information needed by transform."""
        if not isinstance(x, np.ndarray):
            raise TypeError(
                "Expect the data to ImageInput to be numpy.ndarray, "
                "but got {type}.".format(type=type(x))
            )
        if isinstance(x, np.ndarray) and not np.issubdtype(x.dtype, np.number):
            raise TypeError(
                "Expect the data to ImageInput to be numerical, but got "
                "{type}.".format(type=x.dtype)
            )


class TextAdapter(adapter_module.Adapter):
    def check(self, x):
        """Record any information needed by transform."""
        if not isinstance(x, np.ndarray):
            raise TypeError(
                "Expect the data to TextInput to be numpy.ndarray, "
                "but got {type}.".format(type=type(x))
            )


class StructuredDataAdapter(adapter_module.Adapter):
    def check(self, x):
        if not isinstance(x, np.ndarray):
            raise TypeError(
                "Unsupported type {type} for "
                "{name}.".format(type=type(x), name=self.__class__.__name__)
            )


================================================
FILE: autokeras/adapters/input_adapters_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import numpy as np
import pandas as pd
import pytest

from autokeras import test_utils
from autokeras.adapters import input_adapters
from autokeras.utils import data_utils


def test_image_input_adapter_transform_to_dataset():
    x = test_utils.generate_data()
    adapter = input_adapters.ImageAdapter()
    assert isinstance(adapter.adapt(x), np.ndarray)


def test_image_input_unsupported_type():
    x = "unknown"
    adapter = input_adapters.ImageAdapter()
    with pytest.raises(TypeError) as info:
        x = adapter.adapt(x)
    assert "Expect the data to ImageInput to be numpy" in str(info.value)


def test_image_input_numerical():
    x = np.array([[["unknown"]]])
    adapter = input_adapters.ImageAdapter()
    with pytest.raises(TypeError) as info:
        x = adapter.adapt(x)
    assert "Expect the data to ImageInput to be numerical" in str(info.value)


def test_input_type_error():
    x = "unknown"
    adapter = input_adapters.InputAdapter()
    with pytest.raises(TypeError) as info:
        x = adapter.adapt(x)
    assert "Expect the data to Input to be numpy" in str(info.value)


def test_input_numerical():
    x = np.array([[["unknown"]]])
    adapter = input_adapters.InputAdapter()
    with pytest.raises(TypeError) as info:
        x = adapter.adapt(x)
    assert "Expect the data to Input to be numerical" in str(info.value)


def test_text_adapt_np():
    x = np.array(["a b c", "b b c"])
    adapter = input_adapters.TextAdapter()
    x = adapter.adapt(x)

    assert data_utils.dataset_shape(x) == [2]
    assert isinstance(x, np.ndarray)


def test_text_input_type_error():
    x = "unknown"
    adapter = input_adapters.TextAdapter()
    with pytest.raises(TypeError) as info:
        x = adapter.adapt(x)
    assert "Expect the data to TextInput to be numpy" in str(info.value)


def test_structured_data_input_unsupported_type_error():
    with pytest.raises(TypeError) as info:
        adapter = input_adapters.StructuredDataAdapter()
        adapter.adapt("unknown")

    assert "Unsupported type" in str(info.value)


def test_structured_data_input_transform_to_dataset():
    x = pd.read_csv(test_utils.TRAIN_CSV_PATH).to_numpy().astype(str)
    adapter = input_adapters.StructuredDataAdapter()

    x = adapter.adapt(x)

    assert isinstance(x, np.ndarray)


================================================
FILE: autokeras/adapters/output_adapters.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np

from autokeras.engine import adapter as adapter_module


class HeadAdapter(adapter_module.Adapter):
    def __init__(self, name, **kwargs):
        super().__init__(**kwargs)
        self.name = name

    def check(self, dataset):
        if not isinstance(dataset, np.ndarray):
            raise TypeError(
                f"Expect the target data of {self.name} to be"
                f" np.ndarray, but got {type(dataset)}."
            )


class ClassificationAdapter(HeadAdapter):
    pass


class RegressionAdapter(HeadAdapter):
    pass


================================================
FILE: autokeras/adapters/output_adapters_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import pytest

from autokeras.adapters import output_adapters


def test_unsupported_types_error():
    adapter = output_adapters.ClassificationAdapter(name="a")

    with pytest.raises(TypeError) as info:
        adapter.adapt(1)

    assert "Expect the target data of a to be" in str(info.value)


def test_reg_head_transform_1d_np():
    adapter = output_adapters.RegressionAdapter(name="a")

    y = adapter.adapt(np.random.rand(10))

    assert isinstance(y, np.ndarray)


================================================
FILE: autokeras/analysers/__init__.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from autokeras.analysers.input_analysers import CATEGORICAL
from autokeras.analysers.input_analysers import NUMERICAL
from autokeras.analysers.input_analysers import ImageAnalyser
from autokeras.analysers.input_analysers import InputAnalyser
from autokeras.analysers.input_analysers import StructuredDataAnalyser
from autokeras.analysers.input_analysers import TextAnalyser
from autokeras.analysers.output_analysers import ClassificationAnalyser
from autokeras.analysers.output_analysers import RegressionAnalyser


================================================
FILE: autokeras/analysers/input_analysers.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np

from autokeras.engine import analyser

CATEGORICAL = "categorical"
NUMERICAL = "numerical"


class InputAnalyser(analyser.Analyser):
    def finalize(self):
        return


class ImageAnalyser(InputAnalyser):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def finalize(self):
        if len(self.shape) not in [3, 4]:
            raise ValueError(
                "Expect the data to ImageInput to have shape (batch_size, "
                "height, width, channels) or (batch_size, height, width) "
                "dimensions, but got input shape {shape}".format(
                    shape=self.shape
                )
            )


class TextAnalyser(InputAnalyser):
    def correct_shape(self):
        if len(self.shape) == 1:
            return True
        return len(self.shape) == 2 and self.shape[1] == 1

    def finalize(self):
        if not self.correct_shape():
            raise ValueError(
                "Expect the data to TextInput to have shape "
                "(batch_size, 1), but "
                "got input shape {shape}.".format(shape=self.shape)
            )
        if not (self.dtype == "string"):
            raise TypeError(
                "Expect the data to TextInput to be strings, but got "
                "{type}.".format(type=self.dtype)
            )


class StructuredDataAnalyser(InputAnalyser):
    def __init__(self, column_names=None, column_types=None, **kwargs):
        super().__init__(**kwargs)
        self.column_names = column_names
        self.column_types = column_types
        # Variables for inferring column types.
        self.count_numerical = None
        self.count_categorical = None
        self.count_unique_numerical = []
        self.num_col = None

    def update(self, data):
        super().update(data)
        # The super class set self.dtype to "string" based on the input numpy
        # array.  However, the preprocessor will encode it to float32.  So, set
        # self.dtype to "float32", which would be propagated to the Keras Input
        # node.
        self.dtype = "float32"
        if len(self.shape) != 2:
            return
        # data is a numpy array containing all the data.
        self.num_col = data.shape[1]
        self.count_numerical = np.zeros(self.num_col)
        self.count_categorical = np.zeros(self.num_col)
        self.count_unique_numerical = [{} for _ in range(self.num_col)]
        for i in range(self.num_col):
            self._update_column(data[:, i], i)

    def _update_column(self, column_data, i):
        # Vectorized check for numerical values
        def is_numerical(x):
            try:
                float(x)
                return True
            except (ValueError, TypeError):
                return False

        numerical_mask = np.vectorize(is_numerical)(column_data)
        self.count_numerical[i] = np.sum(numerical_mask)
        self.count_categorical[i] = len(column_data) - np.sum(numerical_mask)

        if np.any(numerical_mask):
            # Get numerical values
            numerical_values = column_data[numerical_mask]
            # Handle bytes
            numerical_values = np.array(
                [
                    x.decode("utf-8") if isinstance(x, bytes) else x
                    for x in numerical_values
                ]
            )
            numerical_floats = numerical_values.astype(float)
            unique_vals, counts = np.unique(
                numerical_floats, return_counts=True
            )
            self.count_unique_numerical[i] = dict(zip(unique_vals, counts))

    def finalize(self):
        self.check()
        self.infer_column_types()

    def get_input_name(self):
        return "StructuredDataInput"

    def check(self):
        if len(self.shape) != 2:
            raise ValueError(
                "Expect the data to {input_name} to have shape "
                "(batch_size, num_features), but "
                "got input shape {shape}.".format(
                    input_name=self.get_input_name(), shape=self.shape
                )
            )

        # Fill in the column_names
        if self.column_names is None:
            if self.column_types:
                raise ValueError(
                    "column_names must be specified, if "
                    "column_types is specified."
                )
            self.column_names = [str(index) for index in range(self.shape[1])]

        # Check if column_names has the correct length.
        if len(self.column_names) != self.shape[1]:
            raise ValueError(
                "Expect column_names to have length {expect} "
                "but got {actual}.".format(
                    expect=self.shape[1], actual=len(self.column_names)
                )
            )

    def infer_column_types(self):
        column_types = {}

        for i in range(self.num_col):
            if self.count_categorical[i] > 0:
                column_types[self.column_names[i]] = CATEGORICAL
            elif (
                len(self.count_unique_numerical[i]) / self.count_numerical[i]
                < 0.05
            ):
                column_types[self.column_names[i]] = CATEGORICAL
            else:
                column_types[self.column_names[i]] = NUMERICAL

        # Partial column_types is provided.
        if self.column_types is None:
            self.column_types = {}
        for key, value in column_types.items():
            if key not in self.column_types:
                self.column_types[key] = value


================================================
FILE: autokeras/analysers/input_analysers_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import copy

import numpy as np
import pandas as pd
import pytest

from autokeras import test_utils
from autokeras.analysers import input_analysers


def test_structured_data_input_less_col_name_error():
    with pytest.raises(ValueError) as info:
        analyser = input_analysers.StructuredDataAnalyser(
            column_names=list(range(8))
        )
        dataset = np.random.rand(20, 10)
        analyser.update(dataset)

        analyser.finalize()

    assert "Expect column_names to have length" in str(info.value)


def test_structured_data_infer_col_types():
    analyser = input_analysers.StructuredDataAnalyser(
        column_names=test_utils.COLUMN_NAMES,
        column_types=None,
    )
    x = pd.read_csv(test_utils.TRAIN_CSV_PATH)
    x.pop("survived")
    dataset = x.values.astype(str)

    analyser.update(dataset)
    analyser.finalize()

    assert analyser.column_types == test_utils.COLUMN_TYPES


def test_dont_infer_specified_column_types():
    column_types = copy.copy(test_utils.COLUMN_TYPES)
    column_types.pop("sex")
    column_types["age"] = "categorical"

    analyser = input_analysers.StructuredDataAnalyser(
        column_names=test_utils.COLUMN_NAMES,
        column_types=column_types,
    )
    x = pd.read_csv(test_utils.TRAIN_CSV_PATH)
    x.pop("survived")
    dataset = x.values.astype(str)

    analyser.update(dataset)
    analyser.finalize()

    assert analyser.column_types["age"] == "categorical"


def test_structured_data_input_with_illegal_dim():
    analyser = input_analysers.StructuredDataAnalyser(
        column_names=test_utils.COLUMN_NAMES,
        column_types=None,
    )
    dataset = np.random.rand(100, 32, 32)
    with pytest.raises(ValueError) as info:
        analyser.update(dataset)
        analyser.finalize()

    assert "Expect the data to StructuredDataInput to have shape" in str(
        info.value
    )


def test_image_input_analyser_shape_is_list_of_int():
    analyser = input_analysers.ImageAnalyser()
    dataset = np.random.rand(100, 32, 32, 3)

    analyser.update(dataset)
    analyser.finalize()

    assert isinstance(analyser.shape, list)
    assert all(map(lambda x: isinstance(x, int), analyser.shape))


def test_image_input_with_three_dim():
    analyser = input_analysers.ImageAnalyser()
    dataset = np.random.rand(100, 32, 32)

    analyser.update(dataset)
    analyser.finalize()

    assert len(analyser.shape) == 3


def test_image_input_with_illegal_dim():
    analyser = input_analysers.ImageAnalyser()
    dataset = np.random.rand(100, 32)

    with pytest.raises(ValueError) as info:
        analyser.update(dataset)
        analyser.finalize()

    assert "Expect the data to ImageInput to have shape" in str(info.value)


def test_text_input_with_illegal_dim():
    analyser = input_analysers.TextAnalyser()
    dataset = np.random.rand(100, 32)

    with pytest.raises(ValueError) as info:
        analyser.update(dataset)
        analyser.finalize()

    assert "Expect the data to TextInput to have shape" in str(info.value)


def test_text_analyzer_with_one_dim_doesnt_crash():
    analyser = input_analysers.TextAnalyser()
    dataset = np.array(["a b c", "b b c"])

    analyser.update(dataset)
    analyser.finalize()


def test_text_illegal_type_error():
    analyser = input_analysers.TextAnalyser()
    dataset = np.random.rand(100, 1)

    with pytest.raises(TypeError) as info:
        analyser.update(dataset)
        analyser.finalize()

    assert "Expect the data to TextInput to be strings" in str(info.value)


================================================
FILE: autokeras/analysers/output_analysers.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np

from autokeras.engine import analyser


class TargetAnalyser(analyser.Analyser):
    def __init__(self, name=None, **kwargs):
        super().__init__(**kwargs)
        self.name = name


class ClassificationAnalyser(TargetAnalyser):
    def __init__(self, num_classes=None, multi_label=False, **kwargs):
        super().__init__(**kwargs)
        self.num_classes = num_classes
        self.label_encoder = None
        self.multi_label = multi_label
        self.labels = set()

    def update(self, data):
        super().update(data)
        if len(self.shape) > 2:
            raise ValueError(
                "Expect the target data for {name} to have shape "
                "(batch_size, num_classes), "
                "but got {shape}.".format(name=self.name, shape=self.shape)
            )
        if len(self.shape) > 1 and self.shape[1] > 1:
            return
        self.labels = self.labels.union(set(np.unique(data)))

    def finalize(self):
        # TODO: support raw string labels for multi-label.
        self.labels = sorted(list(self.labels))

        # Infer the num_classes if not specified.
        if not self.num_classes:
            if self.encoded:
                # Single column with 0s and 1s.
                if len(self.shape) == 1 or self.shape[1:] == [1]:
                    self.num_classes = 2
                else:
                    self.num_classes = self.shape[1]
            else:
                self.num_classes = len(self.labels)

        if self.num_classes < 2:
            raise ValueError(
                "Expect the target data for {name} to have "
                "at least 2 classes, but got {num_classes}.".format(
                    name=self.name, num_classes=self.num_classes
                )
            )

        # Check shape equals expected shape.
        expected = self.get_expected_shape()
        actual = self.shape[1:]
        if len(actual) == 0:
            actual = [1]
        if self.encoded and actual != expected:
            raise ValueError(
                "Expect the target data for {name} to have "
                "shape {expected}, but got {actual}.".format(
                    name=self.name, expected=expected, actual=self.shape[1:]
                )
            )

    def get_expected_shape(self):
        # Compute expected shape from num_classes.
        if self.num_classes == 2 and not self.multi_label:
            return [1]
        return [self.num_classes]

    @property
    def encoded(self):
        return self.encoded_for_sigmoid or self.encoded_for_softmax

    @property
    def encoded_for_sigmoid(self):
        if len(self.labels) != 2:
            return False
        return sorted(self.labels) == [0, 1]

    @property
    def encoded_for_softmax(self):
        return len(self.shape) > 1 and self.shape[1] > 1


class RegressionAnalyser(TargetAnalyser):
    def __init__(self, output_dim=None, **kwargs):
        super().__init__(**kwargs)
        self.output_dim = output_dim

    def finalize(self):
        if self.output_dim and (self.expected_dim() != self.output_dim):
            raise ValueError(
                "Expect the target data for {name} to have shape "
                "(batch_size, {output_dim}), "
                "but got {shape}.".format(
                    name=self.name, output_dim=self.output_dim, shape=self.shape
                )
            )

    def expected_dim(self):
        if len(self.shape) == 1:
            return 1
        return self.shape[1]


================================================
FILE: autokeras/analysers/output_analysers_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import pytest

from autokeras import test_utils
from autokeras.analysers import output_analysers


def test_clf_head_one_hot_shape_error():
    analyser = output_analysers.ClassificationAnalyser(name="a", num_classes=9)
    dataset = test_utils.generate_one_hot_labels(num_classes=10)

    with pytest.raises(ValueError) as info:
        analyser.update(dataset)
        analyser.finalize()

    assert "Expect the target data for a to have shape" in str(info.value)


def test_clf_head_more_dim_error():
    analyser = output_analysers.ClassificationAnalyser(name="a", num_classes=9)
    dataset = np.random.rand(100, 32, 32, 3)

    with pytest.raises(ValueError) as info:
        analyser.update(dataset)
        analyser.finalize()

    assert "Expect the target data for a to have shape" in str(info.value)


def test_wrong_num_classes_error():
    analyser = output_analysers.ClassificationAnalyser(name="a", num_classes=5)
    dataset = np.random.rand(10, 3)

    with pytest.raises(ValueError) as info:
        analyser.update(dataset)
        analyser.finalize()

    assert "Expect the target data for a to have shape" in str(info.value)


def test_one_class_error():
    analyser = output_analysers.ClassificationAnalyser(name="a")
    dataset = np.array(["a", "a", "a"])

    with pytest.raises(ValueError) as info:
        analyser.update(dataset)
        analyser.finalize()

    assert "Expect the target data for a to have at least 2 classes" in str(
        info.value
    )


def test_infer_ten_classes():
    analyser = output_analysers.ClassificationAnalyser(name="a")
    dataset = test_utils.generate_one_hot_labels(num_classes=10)

    analyser.update(dataset)
    analyser.finalize()

    assert analyser.num_classes == 10


def test_infer_single_column_two_classes():
    analyser = output_analysers.ClassificationAnalyser(name="a")
    dataset = np.random.randint(0, 2, 10)

    analyser.update(dataset)
    analyser.finalize()

    assert analyser.num_classes == 2


def test_specify_five_classes():
    analyser = output_analysers.ClassificationAnalyser(name="a", num_classes=5)
    dataset = np.random.rand(10, 5)

    analyser.update(dataset)
    analyser.finalize()

    assert analyser.num_classes == 5


def test_specify_two_classes_fit_single_column():
    analyser = output_analysers.ClassificationAnalyser(name="a", num_classes=2)
    dataset = np.random.rand(10, 1)

    analyser.update(dataset)
    analyser.finalize()

    assert analyser.num_classes == 2


def test_multi_label_two_classes_has_two_columns():
    analyser = output_analysers.ClassificationAnalyser(
        name="a", multi_label=True
    )
    dataset = np.random.rand(10, 2)

    analyser.update(dataset)
    analyser.finalize()

    assert analyser.encoded


def test_reg_with_specified_output_dim_error():
    analyser = output_analysers.RegressionAnalyser(name="a", output_dim=3)
    dataset = np.random.rand(10, 2)

    with pytest.raises(ValueError) as info:
        analyser.update(dataset)
        analyser.finalize()

    assert "Expect the target data for a to have shape" in str(info.value)


def test_reg_with_specified_output_dim_and_single_column_doesnt_crash():
    analyser = output_analysers.RegressionAnalyser(name="a", output_dim=1)
    dataset = np.random.rand(10, 1)

    analyser.update(dataset)
    analyser.finalize()


def test_regression_analyser_expected_dim_1d():
    analyser = output_analysers.RegressionAnalyser()
    analyser.shape = [10]  # 1D shape
    assert analyser.expected_dim() == 1


def test_regression_analyser_expected_dim_2d():
    analyser = output_analysers.RegressionAnalyser()
    analyser.shape = [10, 3]  # 2D shape
    assert analyser.expected_dim() == 3


================================================
FILE: autokeras/auto_model.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from pathlib import Path
from typing import List
from typing import Optional
from typing import Type
from typing import Union

import keras
import numpy as np
import tree

from autokeras import blocks
from autokeras import graph as graph_module
from autokeras import pipeline
from autokeras import tuners
from autokeras.engine import head as head_module
from autokeras.engine import node as node_module
from autokeras.engine import tuner
from autokeras.nodes import Input
from autokeras.utils import data_utils
from autokeras.utils import utils

TUNER_CLASSES = {
    "bayesian": tuners.BayesianOptimization,
    "random": tuners.RandomSearch,
    "hyperband": tuners.Hyperband,
    "greedy": tuners.Greedy,
}


def get_tuner_class(tuner):
    if isinstance(tuner, str) and tuner in TUNER_CLASSES:
        return TUNER_CLASSES.get(tuner)
    else:
        raise ValueError(
            'Expected the tuner argument to be one of "greedy", '
            '"random", "hyperband", or "bayesian", '
            "but got {tuner}".format(tuner=tuner)
        )


class AutoModel(object):
    """A Model defined by inputs and outputs.
    AutoModel combines a HyperModel and a Tuner to tune the HyperModel.
    The user can use it in a similar way to a Keras model since it
    also has `fit()` and  `predict()` methods.

    The AutoModel has two use cases. In the first case, the user only specifies
    the input nodes and output heads of the AutoModel. The AutoModel infers the
    rest part of the model. In the second case, user can specify the high-level
    architecture of the AutoModel by connecting the Blocks with the functional
    API, which is the same as the Keras
    [functional
    API](https://keras.io/api/models/model/#with-the-functional-api).

    # Example
    ```python
        # The user only specifies the input nodes and output heads.
        import autokeras as ak
        ak.AutoModel(
            inputs=[ak.ImageInput(), ak.TextInput()],
            outputs=[ak.ClassificationHead(), ak.RegressionHead()]
        )
    ```
    ```python
        # The user specifies the high-level architecture.
        import autokeras as ak
        image_input = ak.ImageInput()
        image_output = ak.ImageBlock()(image_input)
        text_input = ak.TextInput()
        text_output = ak.TextBlock()(text_input)
        output = ak.Merge()([image_output, text_output])
        classification_output = ak.ClassificationHead()(output)
        regression_output = ak.RegressionHead()(output)
        ak.AutoModel(
            inputs=[image_input, text_input],
            outputs=[classification_output, regression_output]
        )
    ```

    # Arguments
        inputs: A list of Node instances.
            The input node(s) of the AutoModel.
        outputs: A list of Node or Head instances.
            The output node(s) or head(s) of the AutoModel.
        project_name: String. The name of the AutoModel. Defaults to
            'auto_model'.
        max_trials: Int. The maximum number of different Keras Models to try.
            The search may finish before reaching the max_trials. Defaults to
            100.
        directory: String. The path to a directory for storing the search
            outputs. Defaults to None, which would create a folder with the
            name of the AutoModel in the current directory.
        objective: String. Name of model metric to minimize
            or maximize, e.g. 'val_accuracy'. Defaults to 'val_loss'.
        tuner: String or subclass of AutoTuner. If string, it should be one of
            'greedy', 'bayesian', 'hyperband' or 'random'. It can also be a
            subclass of AutoTuner. Defaults to 'greedy'.
        overwrite: Boolean. Defaults to `False`. If `False`, reloads an existing
            project of the same name if one is found. Otherwise, overwrites the
            project.
        seed: Int. Random seed.
        max_model_size: Int. Maximum number of scalars in the parameters of a
            model. Models larger than this are rejected.
        **kwargs: Any arguments supported by keras_tuner.Tuner.
    """

    def __init__(
        self,
        inputs: Union[Input, List[Input]],
        outputs: Union[head_module.Head, node_module.Node, list],
        project_name: str = "auto_model",
        max_trials: int = 100,
        directory: Union[str, Path, None] = None,
        objective: str = "val_loss",
        tuner: Union[str, Type[tuner.AutoTuner]] = "greedy",
        overwrite: bool = False,
        seed: Optional[int] = None,
        max_model_size: Optional[int] = None,
        **kwargs
    ):
        self.inputs = tree.flatten(inputs)
        self.outputs = tree.flatten(outputs)
        self.seed = seed
        if seed:
            np.random.seed(seed)
        # TODO: Support passing a tuner instance.
        # Initialize the hyper_graph.
        graph = self._build_graph()
        if isinstance(tuner, str):
            tuner = get_tuner_class(tuner)
        self.tuner = tuner(
            hypermodel=graph,
            overwrite=overwrite,
            objective=objective,
            max_trials=max_trials,
            directory=directory,
            seed=self.seed,
            project_name=project_name,
            max_model_size=max_model_size,
            **kwargs
        )
        self.overwrite = overwrite
        self._heads = [output_node.in_blocks[0] for output_node in self.outputs]

    @property
    def objective(self):
        return self.tuner.objective

    @property
    def max_trials(self):
        return self.tuner.max_trials

    @property
    def directory(self):
        return self.tuner.directory

    @property
    def project_name(self):
        return self.tuner.project_name

    def _assemble(self):
        """Assemble the Blocks based on the input output nodes."""
        inputs = tree.flatten(self.inputs)
        outputs = tree.flatten(self.outputs)

        middle_nodes = [
            input_node.get_block()(input_node) for input_node in inputs
        ]

        # Merge the middle nodes.
        if len(middle_nodes) > 1:
            output_node = blocks.Merge()(middle_nodes)
        else:
            output_node = middle_nodes[0]

        outputs = tree.flatten(
            [output_blocks(output_node) for output_blocks in outputs]
        )
        return graph_module.Graph(inputs=inputs, outputs=outputs)

    def _build_graph(self):
        # Using functional API.
        if all(
            [isinstance(output, node_module.Node) for output in self.outputs]
        ):
            graph = graph_module.Graph(inputs=self.inputs, outputs=self.outputs)
        # Using input/output API.
        elif all(
            [isinstance(output, head_module.Head) for output in self.outputs]
        ):
            # Clear session to reset get_uid(). The names of the blocks will
            # start to count from 1 for new blocks in a new AutoModel
            # afterwards.  When initializing multiple AutoModel with Task API,
            # if not counting from 1 for each of the AutoModel, the predefined
            # hp values in task specifiec tuners would not match the names.
            keras.backend.clear_session()
            graph = self._assemble()
            self.outputs = graph.outputs
            keras.backend.clear_session()

        return graph

    def fit(
        self,
        x=None,
        y=None,
        batch_size=32,
        epochs=None,
        callbacks=None,
        validation_split=0.2,
        validation_data=None,
        verbose=1,
        **kwargs
    ):
        """Search for the best model and hyperparameters for the AutoModel.

        It will search for the best model based on the performances on
        validation data.

        # Arguments
            x: numpy.ndarray. Training data x.
            y: numpy.ndarray. Training data y.
            batch_size: Int. Number of samples per gradient update. Defaults to
                32.
            epochs: Int. The number of epochs to train each model during the
                search. If unspecified, by default we train for a maximum of
                1000 epochs, but we stop training if the validation loss stops
                improving for 10 epochs (unless you specified an EarlyStopping
                callback as part of the callbacks argument, in which case the
                EarlyStopping callback you specified will determine early
                stopping).
            callbacks: List of Keras callbacks to apply during training and
                validation.
            validation_split: Float between 0 and 1. Defaults to 0.2.
                Fraction of the training data to be used as validation data.
                The model will set apart this fraction of the training data,
                will not train on it, and will evaluate the loss and any model
                metrics on this data at the end of each epoch.  The validation
                data is selected from the last samples in the `x` and `y` data
                provided, before shuffling. This argument is not supported when
                `x` is a dataset. The best model found would be fit on the
                entire dataset including the validation data.
            validation_data: Data on which to evaluate the loss and any model
                metrics at the end of each epoch. The model will not be trained
                on this data. `validation_data` will override
                `validation_split`. The type of the validation data should be
                the same as the training data. The best model found would be
                fit on the training dataset without the validation data.
            verbose: 0, 1, or 2. Verbosity mode. 0 = silent, 1 = progress bar,
                2 = one line per epoch. Note that the progress bar is not
                particularly useful when logged to a file, so verbose=2 is
                recommended when not running interactively (eg, in a production
                environment). Controls the verbosity of both KerasTuner search
                and
                [keras.Model.fit](https://keras.io/api/models/model_training_apis/#fit-method)
            **kwargs: Any arguments supported by
                [keras.Model.fit](https://keras.io/api/models/model_training_apis/#fit-method).

        # Returns
            history: A Keras History object corresponding to the best model.
                Its History.history attribute is a record of training
                loss values and metrics values at successive epochs, as well as
                validation loss values and validation metrics values (if
                applicable).
        """
        # Check validation information.
        if not validation_data and not validation_split:
            raise ValueError(
                "Either validation_data or a non-zero validation_split "
                "should be provided."
            )

        if validation_data:
            validation_split = 0

        dataset, validation_data = self._check_and_adapt(
            x=x, y=y, validation_data=validation_data
        )
        self._analyze_data(dataset)
        self._build_hyper_pipeline()

        # Split the data with validation_split.
        if validation_data is None and validation_split:
            dataset, validation_data = data_utils.split_dataset(
                dataset, validation_split
            )

        x, y = dataset
        history = self.tuner.search(
            x=x,
            y=y,
            epochs=epochs,
            callbacks=callbacks,
            validation_data=validation_data,
            validation_split=validation_split,
            verbose=verbose,
            batch_size=batch_size,
            **kwargs
        )

        return history

    def _adapt(self, dataset, hms):
        sources = tree.flatten(dataset)
        adapted = []
        for source, hm in zip(sources, hms):
            source = hm.get_adapter().adapt(source)
            adapted.append(source)
        if len(adapted) == 1:
            return adapted[0]
        return tuple(adapted)

    def _check_numpy_arrays(self, data, name, in_val=""):
        """Check if all elements in the nested structure are numpy arrays."""
        if not all([isinstance(a, np.ndarray) for a in tree.flatten(data)]):
            raise ValueError(
                "Expected "
                "{name}{in_val} to be a numpy array, got {type}".format(
                    name=name,
                    in_val=in_val,
                    type=[type(a) for a in tree.flatten(data)],
                )
            )

    def _check_array_count(self, actual, expected, name, in_val):
        """Check if the number of arrays matches the expected count."""
        if actual != expected:
            raise ValueError(
                "Expected {name}{in_val} to have {expected} arrays, "
                "but got {actual}".format(
                    name=name,
                    in_val=in_val,
                    expected=expected,
                    actual=actual,
                )
            )

    def _check_data_format(self, x, y, validation=False, predict=False):
        """Check if the dataset has the same number of IOs with the model."""
        if validation:
            in_val = " in validation_data"
        else:
            in_val = ""

        self._check_numpy_arrays(x, "x", in_val)
        if y is not None:
            self._check_numpy_arrays(y, "y", in_val)

        self._check_array_count(
            len(tree.flatten(x)), len(self.inputs), "x", in_val
        )
        # When predicting, y is not required.
        if not predict and y is not None:
            self._check_array_count(
                len(tree.flatten(y)), len(self.outputs), "y", in_val
            )

    def _analyze_data(self, dataset):
        input_analysers = [node.get_analyser() for node in self.inputs]
        output_analysers = [head.get_analyser() for head in self._heads]
        analysers = input_analysers + output_analysers
        np_arrays = tree.flatten(dataset)
        for array, analyser in zip(np_arrays, analysers):
            analyser.update(array)

        for analyser in analysers:
            analyser.finalize()

        for hm, analyser in zip(self.inputs + self._heads, analysers):
            hm.config_from_analyser(analyser)

    def _build_hyper_pipeline(self):
        self.tuner.hyper_pipeline = pipeline.HyperPipeline(
            inputs=[node.get_hyper_preprocessors() for node in self.inputs],
            outputs=[head.get_hyper_preprocessors() for head in self._heads],
        )
        self.tuner.hypermodel.hyper_pipeline = self.tuner.hyper_pipeline

    def _check_and_adapt(self, x, y, validation_data):
        # Convert training data.
        self._check_data_format(x, y)
        x = self._adapt(x, self.inputs)
        y = self._adapt(y, self._heads)

        # Convert validation data
        if validation_data:
            self._check_data_format(*validation_data, validation=True)
            x_val, y_val = validation_data
            x_val = self._adapt(x_val, self.inputs)
            y_val = self._adapt(y_val, self._heads)
            validation_data = (x_val, y_val)

        return (x, y), validation_data

    def predict(self, x, batch_size=32, verbose=1, **kwargs):
        """Predict the output for a given testing data.

        # Arguments
            x: Any allowed types according to the input node. Testing data.
            batch_size: Number of samples per batch.
                If unspecified, batch_size will default to 32.
            verbose: Verbosity mode. 0 = silent, 1 = progress bar.
                Controls the verbosity of
                [keras.Model.predict](https://keras.io/api/models/model_training_apis/#predict-method)
            **kwargs: Any arguments supported by keras.Model.predict.

        # Returns
            A list of numpy.ndarray objects or a single numpy.ndarray.
            The predicted results.
        """
        self._check_data_format(x, None, predict=True)
        dataset = self._adapt(x, self.inputs)
        pipeline = self.tuner.get_best_pipeline()
        model = self.tuner.get_best_model()
        dataset = pipeline.transform_x(dataset)
        y = utils.predict_with_adaptive_batch_size(
            model=model,
            batch_size=batch_size,
            x=dataset,
            verbose=verbose,
            **kwargs
        )
        return pipeline.postprocess(y)

    def evaluate(self, x, y=None, batch_size=32, verbose=1, **kwargs):
        """Evaluate the best model for the given data.

        # Arguments
            x: Any allowed types according to the input node. Testing data.
            y: Any allowed types according to the head. Testing targets.
                Defaults to None.
            batch_size: Number of samples per batch.
                If unspecified, batch_size will default to 32.
            verbose: Verbosity mode. 0 = silent, 1 = progress bar.
                Controls the verbosity of
                [keras.Model.evaluate](https://keras.io/api/models/model_training_apis/#evaluate-method)
            **kwargs: Any arguments supported by keras.Model.evaluate.

        # Returns
            Scalar test loss (if the model has a single output and no metrics)
            or list of scalars (if the model has multiple outputs and/or
            metrics). The attribute model.metrics_names will give you the
            display labels for the scalar outputs.
        """
        self._check_data_format(x, y)
        x = self._adapt(x, self.inputs)
        y = self._adapt(y, self._heads)
        pipeline = self.tuner.get_best_pipeline()
        x, y = pipeline.transform((x, y))
        model = self.tuner.get_best_model()
        return utils.evaluate_with_adaptive_batch_size(
            model=model,
            batch_size=batch_size,
            x=x,
            y=y,
            verbose=verbose,
            **kwargs
        )

    def export_model(self):
        """Export the best Keras Model.

        # Returns
            keras.Model instance. The best model found during the search, loaded
            with trained weights.
        """
        return self.tuner.get_best_model()


================================================
FILE: autokeras/auto_model_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from unittest import mock

import keras_tuner
import numpy as np
import pytest

import autokeras as ak
from autokeras import test_utils


def get_tuner_class(*args, **kwargs):
    pipeline = mock.Mock()
    pipeline.transform_x.side_effect = lambda x: x
    tuner = mock.Mock()
    tuner.get_best_pipeline.return_value = pipeline
    tuner_class = mock.Mock()
    tuner_class.return_value = tuner
    return tuner_class


def test_auto_model_objective_is_kt_objective(tmp_path):
    auto_model = ak.AutoModel(
        ak.ImageInput(), ak.RegressionHead(), directory=tmp_path
    )

    assert isinstance(auto_model.objective, keras_tuner.Objective)


def test_auto_model_max_trial_field_as_specified(tmp_path):
    auto_model = ak.AutoModel(
        ak.ImageInput(), ak.RegressionHead(), directory=tmp_path, max_trials=10
    )

    assert auto_model.max_trials == 10


def test_auto_model_directory_field_as_specified(tmp_path):
    auto_model = ak.AutoModel(
        ak.ImageInput(), ak.RegressionHead(), directory=tmp_path
    )

    assert auto_model.directory == tmp_path


def test_auto_model_project_name_field_as_specified(tmp_path):
    auto_model = ak.AutoModel(
        ak.ImageInput(),
        ak.RegressionHead(),
        directory=tmp_path,
        project_name="auto_model",
    )

    assert auto_model.project_name == "auto_model"


@mock.patch("autokeras.auto_model.get_tuner_class")
def test_evaluate(tuner_fn, tmp_path):
    x_train = np.random.rand(100, 32)
    y_train = np.random.rand(100, 1)

    input_node = ak.Input()
    output_node = input_node
    output_node = ak.DenseBlock()(output_node)
    output_node = ak.RegressionHead()(output_node)

    auto_model = ak.AutoModel(
        input_node, output_node, directory=tmp_path, max_trials=1
    )
    auto_model.fit(
        x_train, y_train, epochs=1, validation_data=(x_train, y_train)
    )
    pipeline = tuner_fn.return_value.return_value.get_best_pipeline.return_value
    pipeline.transform.return_value = (x_train, y_train)
    auto_model.evaluate(x_train, y_train)
    assert tuner_fn.called


def get_single_io_auto_model(tmp_path):
    return ak.AutoModel(
        ak.ImageInput(), ak.RegressionHead(), directory=tmp_path, max_trials=2
    )


@mock.patch("autokeras.auto_model.get_tuner_class", side_effect=get_tuner_class)
def test_auto_model_predict(tuner_fn, tmp_path):
    x_train = np.random.rand(100, 32, 32, 3)
    y_train = np.random.rand(100, 1)

    auto_model = get_single_io_auto_model(tmp_path)
    auto_model.fit(x_train, y_train, epochs=2, validation_split=0.2)
    auto_model.predict(x_train)
    assert tuner_fn.called


@mock.patch("autokeras.auto_model.get_tuner_class")
def test_final_fit_concat(tuner_fn, tmp_path):
    tuner = tuner_fn.return_value.return_value

    x_train = np.random.rand(100, 32, 32, 3)
    y_train = np.random.rand(100, 1)

    auto_model = get_single_io_auto_model(tmp_path)
    auto_model.fit(x_train, y_train, epochs=2, validation_split=0.2)
    assert tuner.search.call_args_list[0][1]["validation_split"]


@mock.patch("autokeras.auto_model.get_tuner_class")
def test_final_fit_not_concat(tuner_fn, tmp_path):
    tuner = tuner_fn.return_value.return_value

    x_train = np.random.rand(100, 32, 32, 3)
    y_train = np.random.rand(100, 1)

    auto_model = get_single_io_auto_model(tmp_path)
    auto_model.fit(
        x_train, y_train, epochs=2, validation_data=(x_train, y_train)
    )
    assert not tuner.search.call_args_list[0][1]["validation_split"]


@mock.patch("autokeras.auto_model.get_tuner_class")
def test_overwrite(tuner_fn, tmp_path):
    tuner_class = tuner_fn.return_value

    x_train = np.random.rand(100, 32, 32, 3)
    y_train = np.random.rand(100, 1)

    auto_model = get_single_io_auto_model(tmp_path)
    auto_model.fit(
        x_train, y_train, epochs=2, validation_data=(x_train, y_train)
    )
    assert not tuner_class.call_args_list[0][1]["overwrite"]


@mock.patch("autokeras.auto_model.get_tuner_class")
def test_export_model(tuner_fn, tmp_path):
    tuner_class = tuner_fn.return_value
    tuner = tuner_class.return_value

    x_train = np.random.rand(100, 32, 32, 3)
    y_train = np.random.rand(100, 1)

    auto_model = get_single_io_auto_model(tmp_path)
    auto_model.fit(
        x_train, y_train, epochs=2, validation_data=(x_train, y_train)
    )
    auto_model.export_model()
    assert tuner.get_best_model.called


def get_multi_io_auto_model(tmp_path):
    return ak.AutoModel(
        [ak.ImageInput(), ak.ImageInput()],
        [ak.RegressionHead(), ak.RegressionHead()],
        directory=tmp_path,
        max_trials=2,
        overwrite=False,
    )


def dataset_error(x, y, validation_data, message, tmp_path):
    auto_model = get_multi_io_auto_model(tmp_path)
    with pytest.raises(ValueError) as info:
        auto_model.fit(x, y, epochs=2, validation_data=validation_data)
    assert message in str(info.value)


@mock.patch("autokeras.auto_model.get_tuner_class", side_effect=get_tuner_class)
def test_multi_input_predict(tuner_fn, tmp_path):
    auto_model = get_multi_io_auto_model(tmp_path)
    x1 = test_utils.generate_data()
    y1 = test_utils.generate_data(shape=(1,))
    auto_model.fit(
        x=(x1, x1), y=(y1, y1), epochs=2, validation_data=((x1, x1), (y1, y1))
    )
    auto_model.predict(x=((x1, x1),))


@mock.patch("autokeras.auto_model.get_tuner_class", side_effect=get_tuner_class)
def test_multi_input_predict2(tuner_fn, tmp_path):
    auto_model = get_multi_io_auto_model(tmp_path)
    x1 = test_utils.generate_data()
    y1 = test_utils.generate_data(shape=(1,))
    auto_model.fit(
        x=(x1, x1), y=(y1, y1), epochs=2, validation_data=((x1, x1), (y1, y1))
    )
    auto_model.predict(x=(x1, x1))


def test_invalid_tuner_name_error(tmp_path):
    with pytest.raises(ValueError) as info:
        ak.AutoModel(
            ak.ImageInput(),
            ak.RegressionHead(),
            directory=tmp_path,
            tuner="unknown",
        )

    assert "Expected the tuner argument to be one of" in str(info.value)


def test_no_validation_data_nor_split_error(tmp_path):
    auto_model = ak.AutoModel(
        ak.ImageInput(), ak.RegressionHead(), directory=tmp_path
    )
    with pytest.raises(ValueError) as info:
        auto_model.fit(
            x=np.random.rand(100, 32, 32, 3),
            y=np.random.rand(100, 1),
            validation_split=0,
        )

    assert "Either validation_data or a non-zero" in str(info.value)


@mock.patch("autokeras.auto_model.get_tuner_class", side_effect=get_tuner_class)
def test_predict_tuple_x_and_tuple_y_predict_doesnt_crash(tuner_fn, tmp_path):
    auto_model = ak.AutoModel(
        ak.ImageInput(), ak.RegressionHead(), directory=tmp_path
    )
    x, y = (np.random.rand(100, 32, 32, 3),), (np.random.rand(100, 1),)
    auto_model.fit(x, y)
    auto_model.predict(x)


def test_fit_with_non_numpy_data_raises_error(tmp_path):
    auto_model = ak.AutoModel(
        ak.ImageInput(), ak.RegressionHead(), directory=tmp_path
    )
    x = [[[1, 2, 3]] * 32] * 32  # list, not np.ndarray
    y = [1]
    match_str = "Expected x to be a numpy array"
    with pytest.raises(ValueError, match=match_str):
        auto_model.fit(x, y)


def test_fit_with_wrong_array_count_raises_error(tmp_path):
    auto_model = ak.AutoModel(
        ak.ImageInput(), ak.RegressionHead(), directory=tmp_path
    )
    x = (
        np.random.rand(10, 32, 32, 3),
        np.random.rand(10, 32, 32, 3),
    )  # 2 arrays
    y = np.random.rand(10, 1)
    match_str = "Expected x to have 1 arrays, but got 2"
    with pytest.raises(ValueError, match=match_str):
        auto_model.fit(x, y)


================================================
FILE: autokeras/blocks/__init__.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import keras

from autokeras.blocks.basic import ConvBlock
from autokeras.blocks.basic import DenseBlock
from autokeras.blocks.basic import EfficientNetBlock
from autokeras.blocks.basic import Embedding
from autokeras.blocks.basic import ResNetBlock
from autokeras.blocks.basic import RNNBlock
from autokeras.blocks.basic import XceptionBlock
from autokeras.blocks.heads import ClassificationHead
from autokeras.blocks.heads import RegressionHead
from autokeras.blocks.preprocessing import ImageAugmentation
from autokeras.blocks.preprocessing import Normalization
from autokeras.blocks.reduction import Flatten
from autokeras.blocks.reduction import Merge
from autokeras.blocks.reduction import SpatialReduction
from autokeras.blocks.reduction import TemporalReduction
from autokeras.blocks.wrapper import GeneralBlock
from autokeras.blocks.wrapper import ImageBlock
from autokeras.blocks.wrapper import StructuredDataBlock
from autokeras.blocks.wrapper import TextBlock
from autokeras.utils import utils


def serialize(obj):
    return utils.serialize_keras_object(obj)


def deserialize(config, custom_objects=None):
    return utils.deserialize_keras_object(
        config,
        module_objects=globals(),
        custom_objects=custom_objects,
    )


================================================
FILE: autokeras/blocks/basic.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Optional
from typing import Union

import keras
import tree
from keras import applications
from keras import layers
from keras_tuner.engine import hyperparameters

from autokeras.blocks import reduction
from autokeras.engine import block as block_module
from autokeras.utils import io_utils
from autokeras.utils import layer_utils
from autokeras.utils import utils

RESNET_V1 = {
    "resnet50": applications.ResNet50,
    "resnet101": applications.ResNet101,
    "resnet152": applications.ResNet152,
}

RESNET_V2 = {
    "resnet50_v2": applications.ResNet50V2,
    "resnet101_v2": applications.ResNet101V2,
    "resnet152_v2": applications.ResNet152V2,
}

EFFICIENT_VERSIONS = {
    "b0": applications.EfficientNetB0,
    "b1": applications.EfficientNetB1,
    "b2": applications.EfficientNetB2,
    "b3": applications.EfficientNetB3,
    "b4": applications.EfficientNetB4,
    "b5": applications.EfficientNetB5,
    "b6": applications.EfficientNetB6,
    "b7": applications.EfficientNetB7,
}

PRETRAINED = "pretrained"


@keras.utils.register_keras_serializable(package="autokeras")
class DenseBlock(block_module.Block):
    """Block for Dense layers.

    # Arguments
        num_layers: Int or keras_tuner.engine.hyperparameters.Choice.
            The number of Dense layers in the block.
            If left unspecified, it will be tuned automatically.
        num_units: Int or keras_tuner.engine.hyperparameters.Choice.
            The number of units in each dense layer.
            If left unspecified, it will be tuned automatically.
        use_bn: Boolean. Whether to use BatchNormalization layers.
            If left unspecified, it will be tuned automatically.
        dropout: Float or keras_tuner.engine.hyperparameters.Choice.
            The dropout rate for the layers.
            If left unspecified, it will be tuned automatically.
    """

    def __init__(
        self,
        num_layers: Optional[Union[int, hyperparameters.Choice]] = None,
        num_units: Optional[Union[int, hyperparameters.Choice]] = None,
        use_batchnorm: Optional[bool] = None,
        dropout: Optional[Union[float, hyperparameters.Choice]] = None,
        **kwargs,
    ):
        super().__init__(**kwargs)
        self.num_layers = utils.get_hyperparameter(
            num_layers,
            hyperparameters.Choice("num_layers", [1, 2, 3], default=2),
            int,
        )
        self.num_units = utils.get_hyperparameter(
            num_units,
            hyperparameters.Choice(
                "num_units", [16, 32, 64, 128, 256, 512, 1024], default=32
            ),
            int,
        )
        self.use_batchnorm = use_batchnorm
        self.dropout = utils.get_hyperparameter(
            dropout,
            hyperparameters.Choice("dropout", [0.0, 0.25, 0.5], default=0.0),
            float,
        )

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "num_layers": io_utils.serialize_block_arg(self.num_layers),
                "num_units": io_utils.serialize_block_arg(self.num_units),
                "use_batchnorm": self.use_batchnorm,
                "dropout": io_utils.serialize_block_arg(self.dropout),
            }
        )
        return config

    @classmethod
    def from_config(cls, config):
        config["num_layers"] = io_utils.deserialize_block_arg(
            config["num_layers"]
        )
        config["num_units"] = io_utils.deserialize_block_arg(
            config["num_units"]
        )
        config["dropout"] = io_utils.deserialize_block_arg(config["dropout"])
        return cls(**config)

    def build(self, hp, inputs=None):
        inputs = tree.flatten(inputs)
        utils.validate_num_inputs(inputs, 1)
        input_node = inputs[0]
        output_node = input_node
        output_node = reduction.Flatten().build(hp, output_node)

        use_batchnorm = self.use_batchnorm
        if use_batchnorm is None:
            use_batchnorm = hp.Boolean("use_batchnorm", default=False)

        for i in range(utils.add_to_hp(self.num_layers, hp)):
            units = utils.add_to_hp(self.num_units, hp, "units_{i}".format(i=i))
            output_node = layers.Dense(units)(output_node)
            if use_batchnorm:
                output_node = layers.BatchNormalization()(output_node)
            output_node = layers.ReLU()(output_node)
            if utils.add_to_hp(self.dropout, hp) > 0:
                output_node = layers.Dropout(utils.add_to_hp(self.dropout, hp))(
                    output_node
                )
        return output_node


@keras.utils.register_keras_serializable(package="autokeras")
class RNNBlock(block_module.Block):
    """An RNN Block.

    # Arguments
        return_sequences: Boolean. Whether to return the last output in the
            output sequence, or the full sequence. Defaults to False.
        bidirectional: Boolean or keras_tuner.engine.hyperparameters.Boolean.
            Bidirectional RNN. If left unspecified, it will be
            tuned automatically.
        num_layers: Int or keras_tuner.engine.hyperparameters.Choice.
            The number of layers in RNN. If left unspecified, it will
            be tuned automatically.
        layer_type: String or or keras_tuner.engine.hyperparameters.Choice.
            'gru' or 'lstm'. If left unspecified, it will be tuned
            automatically.
    """

    def __init__(
        self,
        return_sequences: bool = False,
        bidirectional: Optional[Union[bool, hyperparameters.Boolean]] = None,
        num_layers: Optional[Union[int, hyperparameters.Choice]] = None,
        layer_type: Optional[Union[str, hyperparameters.Choice]] = None,
        **kwargs,
    ):
        super().__init__(**kwargs)
        self.return_sequences = return_sequences
        self.bidirectional = utils.get_hyperparameter(
            bidirectional,
            hyperparameters.Boolean("bidirectional", default=True),
            bool,
        )
        self.num_layers = utils.get_hyperparameter(
            num_layers,
            hyperparameters.Choice("num_layers", [1, 2, 3], default=2),
            int,
        )
        self.layer_type = utils.get_hyperparameter(
            layer_type,
            hyperparameters.Choice(
                "layer_type", ["gru", "lstm"], default="lstm"
            ),
            str,
        )

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "return_sequences": self.return_sequences,
                "bidirectional": io_utils.serialize_block_arg(
                    self.bidirectional
                ),
                "num_layers": io_utils.serialize_block_arg(self.num_layers),
                "layer_type": io_utils.serialize_block_arg(self.layer_type),
            }
        )
        return config

    @classmethod
    def from_config(cls, config):
        config["bidirectional"] = io_utils.deserialize_block_arg(
            config["bidirectional"]
        )
        config["num_layers"] = io_utils.deserialize_block_arg(
            config["num_layers"]
        )
        config["layer_type"] = io_utils.deserialize_block_arg(
            config["layer_type"]
        )
        return cls(**config)

    def build(self, hp, inputs=None):
        inputs = tree.flatten(inputs)
        utils.validate_num_inputs(inputs, 1)
        input_node = inputs[0]
        shape = list(input_node.shape)
        if len(shape) != 3:
            raise ValueError(
                "Expect the input tensor of RNNBlock to have dimensions of "
                "[batch_size, time_steps, vec_len], "
                "but got {shape}".format(shape=input_node.shape)
            )

        feature_size = shape[-1]
        output_node = input_node

        bidirectional = utils.add_to_hp(self.bidirectional, hp)
        layer_type = utils.add_to_hp(self.layer_type, hp)
        num_layers = utils.add_to_hp(self.num_layers, hp)
        rnn_layers = {"gru": layers.GRU, "lstm": layers.LSTM}
        in_layer = rnn_layers[layer_type]
        for i in range(num_layers):
            return_sequences = True
            if i == num_layers - 1:
                return_sequences = self.return_sequences
            if bidirectional:
                output_node = layers.Bidirectional(  # pragma: no cover
                    in_layer(feature_size, return_sequences=return_sequences)
                )(output_node)
            else:
                output_node = in_layer(
                    feature_size, return_sequences=return_sequences
                )(output_node)
        return output_node


@keras.utils.register_keras_serializable(package="autokeras")
class ConvBlock(block_module.Block):
    """Block for vanilla ConvNets.

    # Arguments
        kernel_size: Int or keras_tuner.engine.hyperparameters.Choice.
            The size of the kernel.
            If left unspecified, it will be tuned automatically.
        num_blocks: Int or keras_tuner.engine.hyperparameters.Choice.
            The number of conv blocks, each of which may contain
            convolutional, max pooling, dropout, and activation. If left
            unspecified, it will be tuned automatically.
        num_layers: Int or hyperparameters.Choice.
            The number of convolutional layers in each block. If left
            unspecified, it will be tuned automatically.
        filters: Int or keras_tuner.engine.hyperparameters.Choice. The number of
            filters in the convolutional layers. If left unspecified, it will
            be tuned automatically.
        max_pooling: Boolean. Whether to use max pooling layer in each block. If
            left unspecified, it will be tuned automatically.
        separable: Boolean. Whether to use separable conv layers.
            If left unspecified, it will be tuned automatically.
        dropout: Float or kerastuner.engine.hyperparameters.
            Choice range Between 0 and 1.
            The dropout rate after convolutional layers.
            If left unspecified, it will be tuned automatically.
    """

    def __init__(
        self,
        kernel_size: Optional[Union[int, hyperparameters.Choice]] = None,
        num_blocks: Optional[Union[int, hyperparameters.Choice]] = None,
        num_layers: Optional[Union[int, hyperparameters.Choice]] = None,
        filters: Optional[Union[int, hyperparameters.Choice]] = None,
        max_pooling: Optional[bool] = None,
        separable: Optional[bool] = None,
        dropout: Optional[Union[float, hyperparameters.Choice]] = None,
        **kwargs,
    ):
        super().__init__(**kwargs)
        self.kernel_size = utils.get_hyperparameter(
            kernel_size,
            hyperparameters.Choice("kernel_size", [3, 5, 7], default=3),
            int,
        )
        self.num_blocks = utils.get_hyperparameter(
            num_blocks,
            hyperparameters.Choice("num_blocks", [1, 2, 3], default=2),
            int,
        )
        self.num_layers = utils.get_hyperparameter(
            num_layers,
            hyperparameters.Choice("num_layers", [1, 2], default=2),
            int,
        )
        self.filters = utils.get_hyperparameter(
            filters,
            hyperparameters.Choice(
                "filters", [16, 32, 64, 128, 256, 512], default=32
            ),
            int,
        )
        self.max_pooling = max_pooling
        self.separable = separable
        self.dropout = utils.get_hyperparameter(
            dropout,
            hyperparameters.Choice("dropout", [0.0, 0.25, 0.5], default=0.0),
            float,
        )

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "kernel_size": io_utils.serialize_block_arg(self.kernel_size),
                "num_blocks": io_utils.serialize_block_arg(self.num_blocks),
                "num_layers": io_utils.serialize_block_arg(self.num_layers),
                "filters": io_utils.serialize_block_arg(self.filters),
                "max_pooling": self.max_pooling,
                "separable": self.separable,
                "dropout": io_utils.serialize_block_arg(self.dropout),
            }
        )
        return config

    @classmethod
    def from_config(cls, config):
        config["kernel_size"] = io_utils.deserialize_block_arg(
            config["kernel_size"]
        )
        config["num_blocks"] = io_utils.deserialize_block_arg(
            config["num_blocks"]
        )
        config["num_layers"] = io_utils.deserialize_block_arg(
            config["num_layers"]
        )
        config["filters"] = io_utils.deserialize_block_arg(config["filters"])
        config["dropout"] = io_utils.deserialize_block_arg(config["dropout"])
        return cls(**config)

    def build(self, hp, inputs=None):
        inputs = tree.flatten(inputs)
        utils.validate_num_inputs(inputs, 1)
        input_node = inputs[0]
        output_node = input_node

        kernel_size = utils.add_to_hp(self.kernel_size, hp)

        separable = self.separable
        if separable is None:
            separable = hp.Boolean("separable", default=False)

        if separable:
            conv = layer_utils.get_sep_conv(
                input_node.shape
            )  # pragma: no cover
        else:
            conv = layer_utils.get_conv(input_node.shape)

        max_pooling = self.max_pooling
        if max_pooling is None:
            max_pooling = hp.Boolean("max_pooling", default=True)
        pool = layer_utils.get_max_pooling(input_node.shape)

        for i in range(utils.add_to_hp(self.num_blocks, hp)):
            for j in range(utils.add_to_hp(self.num_layers, hp)):
                output_node = conv(
                    utils.add_to_hp(
                        self.filters, hp, "filters_{i}_{j}".format(i=i, j=j)
                    ),
                    kernel_size,
                    padding=self._get_padding(kernel_size, output_node),
                    activation="relu",
                )(output_node)
            if max_pooling:
                output_node = pool(
                    kernel_size - 1,
                    padding=self._get_padding(kernel_size - 1, output_node),
                )(output_node)
            if utils.add_to_hp(self.dropout, hp) > 0:
                output_node = layers.Dropout(utils.add_to_hp(self.dropout, hp))(
                    output_node
                )
        return output_node

    @staticmethod
    def _get_padding(kernel_size, output_node):
        if all(kernel_size * 2 <= length for length in output_node.shape[1:-1]):
            return "valid"
        return "same"


@keras.utils.register_keras_serializable(package="autokeras")
class KerasApplicationBlock(block_module.Block):
    """Blocks extending Keras applications."""

    def __init__(self, pretrained, models, min_size, **kwargs):
        super().__init__(**kwargs)
        self.pretrained = pretrained
        self.models = models
        self.min_size = min_size

    def get_config(self):
        config = super().get_config()
        config.update({"pretrained": self.pretrained})
        return config

    def build(self, hp, inputs=None):
        input_node = tree.flatten(inputs)[0]

        pretrained = self.pretrained
        if input_node.shape[3] not in [1, 3]:
            if self.pretrained:
                raise ValueError(
                    "When pretrained is set to True, expect input to "
                    "have 1 or 3 channels, bug got "
                    "{channels}.".format(channels=input_node.shape[3])
                )
            pretrained = False
        if pretrained is None:
            pretrained = hp.Boolean(PRETRAINED, default=False)
            if pretrained:
                with hp.conditional_scope(PRETRAINED, [True]):
                    trainable = hp.Boolean("trainable", default=False)
        elif pretrained:
            trainable = hp.Boolean("trainable", default=False)

        if len(self.models) > 1:
            version = hp.Choice("version", list(self.models.keys()))
        else:
            version = list(self.models.keys())[0]

        min_size = self.min_size
        if hp.Boolean("imagenet_size", default=False):
            min_size = 224
        if input_node.shape[1] < min_size or input_node.shape[2] < min_size:
            input_node = layers.Resizing(
                max(min_size, input_node.shape[1]),
                max(min_size, input_node.shape[2]),
            )(input_node)
        if input_node.shape[3] == 1:
            input_node = layers.Concatenate()([input_node] * 3)
        if input_node.shape[3] != 3:
            input_node = layers.Conv2D(
                filters=3, kernel_size=1, padding="same"
            )(input_node)

        if pretrained:
            model = self.models[version](weights="imagenet", include_top=False)
            model.trainable = trainable
        else:
            model = self.models[version](
                weights=None,
                include_top=False,
                input_shape=input_node.shape[1:],
            )

        return model(input_node)


@keras.utils.register_keras_serializable(package="autokeras")
class ResNetBlock(KerasApplicationBlock):
    """Block for ResNet.

    # Arguments
        version: String. 'v1', 'v2'. The type of ResNet to use.
            If left unspecified, it will be tuned automatically.
        pretrained: Boolean. Whether to use ImageNet pretrained weights.
            If left unspecified, it will be tuned automatically.
    """

    def __init__(
        self,
        version: Optional[str] = None,
        pretrained: Optional[bool] = None,
        **kwargs,
    ):
        if version is None:
            models = {**RESNET_V1, **RESNET_V2}
        elif version == "v1":
            models = RESNET_V1
        elif version == "v2":
            models = RESNET_V2
        else:
            raise ValueError(
                'Expect version to be "v1", or "v2", but got '
                "{version}.".format(version=version)
            )
        super().__init__(
            pretrained=pretrained, models=models, min_size=32, **kwargs
        )
        self.version = version

    def get_config(self):
        config = super().get_config()
        config.update({"version": self.version})
        return config


@keras.utils.register_keras_serializable(package="autokeras")
class XceptionBlock(KerasApplicationBlock):
    """Block for XceptionNet.

    An Xception structure, used for specifying your model with specific
    datasets.

    The original Xception architecture is from
    [https://arxiv.org/abs/1610.02357](https://arxiv.org/abs/1610.02357).
    The data first goes through the entry flow, then through the middle flow
    which is repeated eight times, and finally through the exit flow.

    This XceptionBlock returns a similar architecture as Xception except without
    the last (optional) fully connected layer(s) and logistic regression.
    The size of this architecture could be decided by `HyperParameters`, to get
    an architecture with a half, an identical, or a double size of the original
    one.

    # Arguments
        pretrained: Boolean. Whether to use ImageNet pretrained weights.
            If left unspecified, it will be tuned automatically.
    """

    def __init__(self, pretrained: Optional[bool] = None, **kwargs):
        super().__init__(
            pretrained=pretrained,
            models={"xception": applications.Xception},
            min_size=71,
            **kwargs,
        )


@keras.utils.register_keras_serializable(package="autokeras")
class EfficientNetBlock(KerasApplicationBlock):
    """Block for EfficientNet.

    # Arguments
        version: String. The value should be one of 'b0', 'b1', ..., 'b7'. The
            type of EfficientNet to use. If left unspecified, it will be tuned
            automatically.
        pretrained: Boolean. Whether to use ImageNet pretrained weights.
            If left unspecified, it will be tuned automatically.
    """

    def __init__(
        self,
        version: Optional[str] = None,
        pretrained: Optional[bool] = None,
        **kwargs,
    ):
        if version is None:
            models = EFFICIENT_VERSIONS
        elif version in EFFICIENT_VERSIONS.keys():
            models = {version: EFFICIENT_VERSIONS[version]}
        else:
            raise ValueError(
                "Expect version to be in {expect}, but got "
                "{version}.".format(
                    expect=list(EFFICIENT_VERSIONS.keys()), version=version
                )
            )
        super().__init__(
            pretrained=pretrained,
            models=models,
            min_size=32,
            **kwargs,
        )
        self.version = version


@keras.utils.register_keras_serializable(package="autokeras")
class Embedding(block_module.Block):
    """Word embedding block for sequences.

    The input should be tokenized sequences with the same length, where each
    element of a sequence should be the index of the word.

    # Arguments
        max_features: Int. Size of the vocabulary. Must be set if not using
            TextToIntSequence before this block. Defaults to 20001.
        embedding_dim: Int or keras_tuner.engine.hyperparameters.Choice.
            Output dimension of the Attention block.
            If left unspecified, it will be tuned automatically.
        dropout: Float or keras_tuner.engine.hyperparameters.Choice.
            The dropout rate for the layers.
            If left unspecified, it will be tuned automatically.
    """

    def __init__(
        self,
        max_features: int = 20001,
        embedding_dim: Optional[Union[int, hyperparameters.Choice]] = None,
        dropout: Optional[Union[float, hyperparameters.Choice]] = None,
        **kwargs,
    ):
        super().__init__(**kwargs)
        self.max_features = max_features
        self.embedding_dim = utils.get_hyperparameter(
            embedding_dim,
            hyperparameters.Choice(
                "embedding_dim", [32, 64, 128, 256, 512], default=128
            ),
            int,
        )
        self.dropout = utils.get_hyperparameter(
            dropout,
            hyperparameters.Choice("dropout", [0.0, 0.25, 0.5], default=0.25),
            float,
        )

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "max_features": self.max_features,
                "embedding_dim": io_utils.serialize_block_arg(
                    self.embedding_dim
                ),
                "dropout": io_utils.serialize_block_arg(self.dropout),
            }
        )
        return config

    @classmethod
    def from_config(cls, config):
        config["dropout"] = io_utils.deserialize_block_arg(config["dropout"])
        config["embedding_dim"] = io_utils.deserialize_block_arg(
            config["embedding_dim"]
        )
        return cls(**config)

    def build(self, hp, inputs=None):
        input_node = tree.flatten(inputs)[0]
        embedding_dim = utils.add_to_hp(self.embedding_dim, hp)
        layer = layers.Embedding(
            input_dim=self.max_features, output_dim=embedding_dim
        )
        output_node = layer(input_node)
        dropout = utils.add_to_hp(self.dropout, hp)
        if dropout > 0:
            output_node = layers.Dropout(dropout)(output_node)
        return output_node


================================================
FILE: autokeras/blocks/basic_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import keras
import keras_tuner
import pytest
import tree

from autokeras import blocks
from autokeras import test_utils


def test_resnet_build_return_tensor():
    block = blocks.ResNetBlock()

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32, 32, 3), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_resnet_v1_return_tensor():
    block = blocks.ResNetBlock(version="v1")

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32, 32, 3), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_efficientnet_b0_return_tensor():
    block = blocks.EfficientNetBlock(version="b0", pretrained=False)

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32, 32, 3), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_resnet_pretrained_build_return_tensor():
    block = blocks.ResNetBlock(pretrained=True)

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32, 32, 3), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_resnet_pretrained_with_one_channel_input():
    block = blocks.ResNetBlock(pretrained=True)

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(28, 28, 1), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_resnet_pretrained_error_with_two_channels():
    block = blocks.ResNetBlock(pretrained=True)

    with pytest.raises(ValueError) as info:
        block.build(
            keras_tuner.HyperParameters(),
            keras.Input(shape=(224, 224, 2), dtype="float32"),
        )

    assert "When pretrained is set to True" in str(info.value)


def test_resnet_deserialize_to_resnet():
    serialized_block = blocks.serialize(blocks.ResNetBlock())

    block = blocks.deserialize(serialized_block)

    assert isinstance(block, blocks.ResNetBlock)


def test_resnet_get_config_has_all_attributes():
    block = blocks.ResNetBlock()

    config = block.get_config()

    assert test_utils.get_func_args(blocks.ResNetBlock.__init__).issubset(
        config.keys()
    )


def test_resnet_wrong_version_error():
    with pytest.raises(ValueError) as info:
        blocks.ResNetBlock(version="abc")

    assert "Expect version to be" in str(info.value)


def test_efficientnet_wrong_version_error():
    with pytest.raises(ValueError) as info:
        blocks.EfficientNetBlock(version="abc")

    assert "Expect version to be" in str(info.value)


def test_xception_build_return_tensor():
    block = blocks.XceptionBlock()

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32, 32, 2), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_xception_pretrained_build_return_tensor():
    block = blocks.XceptionBlock(pretrained=True)

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32, 32, 3), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_xception_pretrained_with_one_channel_input():
    block = blocks.XceptionBlock(pretrained=True)

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(224, 224, 1), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_xception_pretrained_error_with_two_channels():
    block = blocks.XceptionBlock(pretrained=True)

    with pytest.raises(ValueError) as info:
        block.build(
            keras_tuner.HyperParameters(),
            keras.Input(shape=(224, 224, 2), dtype="float32"),
        )

    assert "When pretrained is set to True" in str(info.value)


def test_xception_deserialize_to_xception():
    serialized_block = blocks.serialize(blocks.XceptionBlock())

    block = blocks.deserialize(serialized_block)

    assert isinstance(block, blocks.XceptionBlock)


def test_xception_get_config_has_all_attributes():
    block = blocks.XceptionBlock()

    config = block.get_config()

    assert test_utils.get_func_args(blocks.XceptionBlock.__init__).issubset(
        config.keys()
    )


def test_conv_build_return_tensor():
    block = blocks.ConvBlock()

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32, 32, 3), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_conv_with_small_image_size_return_tensor():
    block = blocks.ConvBlock()

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(10, 10, 3), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_conv_build_with_dropout_return_tensor():
    block = blocks.ConvBlock(dropout=0.5)

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32, 32, 3), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_conv_deserialize_to_conv():
    serialized_block = blocks.serialize(blocks.ConvBlock())

    block = blocks.deserialize(serialized_block)

    assert isinstance(block, blocks.ConvBlock)


def test_conv_get_config_has_all_attributes():
    block = blocks.ConvBlock()

    config = block.get_config()

    assert test_utils.get_func_args(blocks.ConvBlock.__init__).issubset(
        config.keys()
    )


def test_rnn_build_return_tensor():
    block = blocks.RNNBlock(bidirectional=False)

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32, 10), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_rnn_input_shape_one_dim_error():
    block = blocks.RNNBlock()

    with pytest.raises(ValueError) as info:
        block.build(
            keras_tuner.HyperParameters(),
            keras.Input(shape=(32,), dtype="float32"),
        )

    assert "Expect the input tensor of RNNBlock" in str(info.value)


def test_rnn_deserialize_to_rnn():
    serialized_block = blocks.serialize(blocks.RNNBlock())

    block = blocks.deserialize(serialized_block)

    assert isinstance(block, blocks.RNNBlock)


def test_rnn_get_config_has_all_attributes():
    block = blocks.RNNBlock()

    config = block.get_config()

    assert test_utils.get_func_args(blocks.RNNBlock.__init__).issubset(
        config.keys()
    )


def test_dense_build_return_tensor():
    block = blocks.DenseBlock(
        num_units=keras_tuner.engine.hyperparameters.Choice(
            "num_units", [10, 20]
        )
    )

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32,), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_dense_build_with_dropout_return_tensor():
    block = blocks.DenseBlock(dropout=0.5)

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32,), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_dense_build_with_bn_return_tensor():
    block = blocks.DenseBlock(use_batchnorm=True)

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32,), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_dense_deserialize_to_dense():
    serialized_block = blocks.serialize(blocks.DenseBlock())

    block = blocks.deserialize(serialized_block)

    assert isinstance(block, blocks.DenseBlock)


def test_dense_get_config_has_all_attributes():
    block = blocks.DenseBlock()

    config = block.get_config()

    assert test_utils.get_func_args(blocks.DenseBlock.__init__).issubset(
        config.keys()
    )


def test_embed_build_return_tensor():
    block = blocks.Embedding()

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32,), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_embed_deserialize_to_embed():
    serialized_block = blocks.serialize(blocks.Embedding())

    block = blocks.deserialize(serialized_block)

    assert isinstance(block, blocks.Embedding)


def test_embed_get_config_has_all_attributes():
    block = blocks.Embedding()

    config = block.get_config()

    assert test_utils.get_func_args(blocks.Embedding.__init__).issubset(
        config.keys()
    )


================================================
FILE: autokeras/blocks/heads.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Optional

import keras
import tree
from keras import activations
from keras import layers
from keras import losses

from autokeras import adapters
from autokeras import analysers
from autokeras import hyper_preprocessors as hpps_module
from autokeras import preprocessors
from autokeras.blocks import reduction
from autokeras.engine import head as head_module
from autokeras.utils import types
from autokeras.utils import utils


@keras.utils.register_keras_serializable(package="autokeras")
class ClassificationHead(head_module.Head):
    """Classification Dense layers.

    Use sigmoid and binary crossentropy for binary classification and
    multi-label classification. Use softmax and categorical crossentropy for
    multi-class (more than 2) classification. Use Accuracy as metrics by
    default.

    The targets passing to the head would have to be np.ndarray. It can be raw
    labels, one-hot encoded if more than two classes, or binary encoded for
    binary classification.

    The raw labels will be encoded to one column if two classes were found,
    or one-hot encoded if more than two classes were found.

    # Arguments
        num_classes: Int. Defaults to None. If None, it will be inferred from
            the data.
        multi_label: Boolean. Defaults to False.
        loss: A Keras loss function. Defaults to use `binary_crossentropy` or
            `categorical_crossentropy` based on the number of classes.
        metrics: A list of Keras metrics. Defaults to use 'accuracy'.
        dropout: Float. The dropout rate for the layers.
            If left unspecified, it will be tuned automatically.
    """

    def __init__(
        self,
        num_classes: Optional[int] = None,
        multi_label: bool = False,
        loss: Optional[types.LossType] = None,
        metrics: Optional[types.MetricsType] = None,
        dropout: Optional[float] = None,
        **kwargs
    ):
        self.num_classes = num_classes
        self.multi_label = multi_label
        self.dropout = dropout
        if metrics is None:
            metrics = ["accuracy"]
        if loss is None:
            loss = self.infer_loss()
        super().__init__(loss=loss, metrics=metrics, **kwargs)
        # Infered from analyser.
        self._encoded = None
        self._encoded_for_sigmoid = None
        self._encoded_for_softmax = None
        self._add_one_dimension = False
        self._labels = None

    def infer_loss(self):
        if not self.num_classes:
            return None
        if self.num_classes == 2 or self.multi_label:
            return losses.BinaryCrossentropy()
        return losses.CategoricalCrossentropy()

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "num_classes": self.num_classes,
                "multi_label": self.multi_label,
                "dropout": self.dropout,
            }
        )
        return config

    def build(self, hp, inputs=None):
        inputs = tree.flatten(inputs)
        utils.validate_num_inputs(inputs, 1)
        input_node = inputs[0]
        output_node = input_node

        # Reduce the tensor to a vector.
        if len(output_node.shape) > 2:
            output_node = reduction.SpatialReduction().build(hp, output_node)

        if self.dropout is not None:
            dropout = self.dropout
        else:
            dropout = hp.Choice("dropout", [0.0, 0.25, 0.5], default=0)

        if dropout > 0:
            output_node = layers.Dropout(dropout)(output_node)
        output_node = layers.Dense(self.shape[-1])(output_node)
        if isinstance(self.loss, keras.losses.BinaryCrossentropy):
            output_node = layers.Activation(
                activations.sigmoid, name=self.name
            )(output_node)
        else:
            output_node = layers.Softmax(name=self.name)(output_node)
        return output_node

    def get_adapter(self):
        return adapters.ClassificationAdapter(name=self.name)

    def get_analyser(self):
        return analysers.ClassificationAnalyser(
            name=self.name, multi_label=self.multi_label
        )

    def config_from_analyser(self, analyser):
        super().config_from_analyser(analyser)
        self.num_classes = analyser.num_classes
        self.loss = self.infer_loss()
        self._encoded = analyser.encoded
        self._encoded_for_sigmoid = analyser.encoded_for_sigmoid
        self._encoded_for_softmax = analyser.encoded_for_softmax
        self._add_one_dimension = len(analyser.shape) == 1
        self._labels = analyser.labels

    def get_hyper_preprocessors(self):
        hyper_preprocessors = []

        if self._add_one_dimension:
            hyper_preprocessors.append(
                hpps_module.DefaultHyperPreprocessor(
                    preprocessors.AddOneDimension()
                )
            )

        if self.dtype in ["uint8", "uint16", "uint32", "uint64"]:
            hyper_preprocessors.append(
                hpps_module.DefaultHyperPreprocessor(
                    preprocessors.CastToInt32()
                )
            )

        if not self._encoded and self.dtype != "string":
            hyper_preprocessors.append(
                hpps_module.DefaultHyperPreprocessor(
                    preprocessors.CastToString()
                )
            )

        if self._encoded_for_sigmoid:
            hyper_preprocessors.append(
                hpps_module.DefaultHyperPreprocessor(
                    preprocessors.SigmoidPostprocessor()
                )
            )
        elif self._encoded_for_softmax:
            hyper_preprocessors.append(
                hpps_module.DefaultHyperPreprocessor(
                    preprocessors.SoftmaxPostprocessor()
                )
            )
        elif self.num_classes == 2:
            hyper_preprocessors.append(
                hpps_module.DefaultHyperPreprocessor(
                    preprocessors.LabelEncoder(self._labels)
                )
            )
        else:
            hyper_preprocessors.append(
                hpps_module.DefaultHyperPreprocessor(
                    preprocessors.OneHotEncoder(self._labels)
                )
            )
        return hyper_preprocessors


@keras.utils.register_keras_serializable(package="autokeras")
class RegressionHead(head_module.Head):
    """Regression Dense layers.

    The targets passing to the head would have to be np.ndarray. It can be
    single-column or multi-column. The values should all be numerical.

    # Arguments
        output_dim: Int. The number of output dimensions. Defaults to None.
            If None, it will be inferred from the data.
        multi_label: Boolean. Defaults to False.
        loss: A Keras loss function. Defaults to use `mean_squared_error`.
        metrics: A list of Keras metrics. Defaults to use `mean_squared_error`.
        dropout: Float. The dropout rate for the layers.
            If left unspecified, it will be tuned automatically.
    """

    def __init__(
        self,
        output_dim: Optional[int] = None,
        loss: types.LossType = "mean_squared_error",
        metrics: Optional[types.MetricsType] = None,
        dropout: Optional[float] = None,
        **kwargs
    ):
        if metrics is None:
            metrics = ["mean_squared_error"]
        super().__init__(loss=loss, metrics=metrics, **kwargs)
        self.output_dim = output_dim
        self.dropout = dropout

    def get_config(self):
        config = super().get_config()
        config.update({"output_dim": self.output_dim, "dropout": self.dropout})
        return config

    def build(self, hp, inputs=None):
        inputs = tree.flatten(inputs)
        utils.validate_num_inputs(inputs, 1)
        input_node = inputs[0]
        output_node = input_node

        if self.dropout is not None:
            dropout = self.dropout
        else:
            dropout = hp.Choice("dropout", [0.0, 0.25, 0.5], default=0)

        if dropout > 0:
            output_node = layers.Dropout(dropout)(output_node)
        output_node = reduction.Flatten().build(hp, output_node)
        output_node = layers.Dense(self.shape[-1], name=self.name)(output_node)
        return output_node

    def config_from_analyser(self, analyser):
        super().config_from_analyser(analyser)
        self._add_one_dimension = len(analyser.shape) == 1

    def get_adapter(self):
        return adapters.RegressionAdapter(name=self.name)

    def get_analyser(self):
        return analysers.RegressionAnalyser(
            name=self.name, output_dim=self.output_dim
        )

    def get_hyper_preprocessors(self):
        hyper_preprocessors = []
        if self._add_one_dimension:
            hyper_preprocessors.append(  # pragma: no cover
                hpps_module.DefaultHyperPreprocessor(
                    preprocessors.AddOneDimension()
                )
            )
        return hyper_preprocessors


================================================
FILE: autokeras/blocks/heads_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import keras
import keras_tuner
import numpy as np
import tree

from autokeras import hyper_preprocessors
from autokeras import nodes as input_module
from autokeras import preprocessors
from autokeras import test_utils
from autokeras.blocks import heads as head_module


def test_two_classes_infer_binary_crossentropy():
    dataset = np.array(["a", "a", "a", "b"])
    head = head_module.ClassificationHead(name="a", shape=(1,))
    adapter = head.get_adapter()
    dataset = adapter.adapt(dataset)
    analyser = head.get_analyser()
    analyser.update(dataset)
    analyser.finalize()
    head.config_from_analyser(analyser)
    head.build(
        keras_tuner.HyperParameters(),
        input_module.Input(shape=(32,)).build_node(
            keras_tuner.HyperParameters()
        ),
    )
    assert head.loss.name == "binary_crossentropy"


def test_three_classes_infer_categorical_crossentropy():
    dataset = np.array(["a", "a", "c", "b"])
    head = head_module.ClassificationHead(name="a", shape=(1,))
    adapter = head.get_adapter()
    dataset = adapter.adapt(dataset)
    analyser = head.get_analyser()
    analyser.update(dataset)
    analyser.finalize()
    head.config_from_analyser(analyser)
    head.build(
        keras_tuner.HyperParameters(),
        input_module.Input(shape=(32,)).build_node(
            keras_tuner.HyperParameters()
        ),
    )
    assert head.loss.name == "categorical_crossentropy"


def test_multi_label_loss():
    head = head_module.ClassificationHead(
        name="a", multi_label=True, num_classes=8, shape=(8,)
    )
    input_node = keras.Input(shape=(5,))
    output_node = head.build(keras_tuner.HyperParameters(), input_node)
    model = keras.Model(input_node, output_node)
    assert model.layers[-1].activation.__name__ == "sigmoid"
    assert head.loss.name == "binary_crossentropy"


def test_clf_head_get_sigmoid_postprocessor():
    head = head_module.ClassificationHead(name="a", multi_label=True)
    head._encoded = True
    head._encoded_for_sigmoid = True
    assert isinstance(
        head.get_hyper_preprocessors()[0].preprocessor,
        preprocessors.SigmoidPostprocessor,
    )


def test_clf_head_with_2_clases_get_label_encoder():
    head = head_module.ClassificationHead(name="a", num_classes=2)
    head._encoded = False
    head._labels = ["a", "b"]
    assert isinstance(
        head.get_hyper_preprocessors()[-1].preprocessor,
        preprocessors.LabelEncoder,
    )


def test_clf_head_build_with_zero_dropout_return_tensor():
    block = head_module.ClassificationHead(dropout=0, shape=(8,))

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(5,), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_clf_head_hpps_with_uint8_contain_cast_to_int32():
    dataset = test_utils.generate_one_hot_labels(100, 10)
    dataset = dataset.astype("uint8")
    head = head_module.ClassificationHead(shape=(8,))
    analyser = head.get_analyser()
    for data in dataset:
        analyser.update(data)
    analyser.finalize()
    head.config_from_analyser(analyser)

    assert any(
        [
            isinstance(hpp, hyper_preprocessors.DefaultHyperPreprocessor)
            and isinstance(hpp.preprocessor, preprocessors.CastToInt32)
            for hpp in head.get_hyper_preprocessors()
        ]
    )


def test_reg_head_build_with_zero_dropout_return_tensor():
    block = head_module.RegressionHead(dropout=0, shape=(8,))

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(5,), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


================================================
FILE: autokeras/blocks/preprocessing.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Blocks for data preprocessing.

They are built into keras preprocessing layers and will be part of the Keras
model.

"""
from typing import Optional
from typing import Tuple
from typing import Union

import keras
import tree
from keras import layers
from keras_tuner.engine import hyperparameters

from autokeras.engine import block as block_module
from autokeras.utils import io_utils
from autokeras.utils import utils


class Normalization(block_module.Block):
    """Perform feature-wise normalization on data.

    Refer to Normalization layer in keras preprocessing layers for more
    information.

    # Arguments
        axis: Integer or tuple of integers, the axis or axes that should be
            normalized (typically the features axis). We will normalize each
            element in the specified axis. The default is '-1' (the innermost
            axis); 0 (the batch axis) is not allowed.
    """

    def __init__(self, axis: int = -1, **kwargs):
        super().__init__(**kwargs)
        self.axis = axis

    def build(self, hp, inputs=None):
        input_node = tree.flatten(inputs)[0]
        return layers.Normalization(axis=self.axis)(input_node)

    def get_config(self):
        config = super().get_config()
        config.update({"axis": self.axis})
        return config


@keras.utils.register_keras_serializable(package="autokeras")
class ImageAugmentation(block_module.Block):
    """Collection of various image augmentation methods.

    # Arguments
        translation_factor: A positive float represented as fraction value, or a
            tuple of 2 representing fraction for translation vertically and
            horizontally, or a kerastuner.engine.hyperparameters.Choice range
            of positive floats. For instance, `translation_factor=0.2` result
            in a random translation factor within 20% of the width and height.
            If left unspecified, it will be tuned automatically.
        vertical_flip: Boolean. Whether to flip the image vertically.
            If left unspecified, it will be tuned automatically.
        horizontal_flip: Boolean. Whether to flip the image horizontally.
            If left unspecified, it will be tuned automatically.
        rotation_factor: Float or kerastuner.engine.hyperparameters.Choice range
            between [0, 1]. A positive float represented as fraction of 2pi
            upper bound for rotating clockwise and counter-clockwise. When
            represented as a single float, lower = upper.
            If left unspecified, it will be tuned automatically.
        zoom_factor: A positive float represented as fraction value, or a tuple
            of 2 representing fraction for zooming vertically and horizontally,
            or a kerastuner.engine.hyperparameters.Choice range of positive
            floats.  For instance, `zoom_factor=0.2` result in a random zoom
            factor from 80% to 120%. If left unspecified, it will be tuned
            automatically.
        contrast_factor: A positive float represented as fraction of value, or a
            tuple of size 2 representing lower and upper bound, or a
            kerastuner.engine.hyperparameters.Choice range of floats to find the
            optimal value. When represented as a single float, lower = upper.
            The contrast factor will be randomly picked
            between [1.0 - lower, 1.0 + upper]. If left unspecified, it will be
            tuned automatically.
    """

    def __init__(
        self,
        translation_factor: Optional[
            Union[float, Tuple[float, float], hyperparameters.Choice]
        ] = None,
        vertical_flip: Optional[bool] = None,
        horizontal_flip: Optional[bool] = None,
        rotation_factor: Optional[Union[float, hyperparameters.Choice]] = None,
        zoom_factor: Optional[
            Union[float, Tuple[float, float], hyperparameters.Choice]
        ] = None,
        contrast_factor: Optional[
            Union[float, Tuple[float, float], hyperparameters.Choice]
        ] = None,
        **kwargs
    ):
        super().__init__(**kwargs)
        self.translation_factor = utils.get_hyperparameter(
            translation_factor,
            hyperparameters.Choice("translation_factor", [0.0, 0.1]),
            Union[float, Tuple[float, float]],
        )
        self.horizontal_flip = horizontal_flip
        self.vertical_flip = vertical_flip
        self.rotation_factor = utils.get_hyperparameter(
            rotation_factor,
            hyperparameters.Choice("rotation_factor", [0.0, 0.1]),
            float,
        )
        self.zoom_factor = utils.get_hyperparameter(
            zoom_factor,
            hyperparameters.Choice("zoom_factor", [0.0, 0.1]),
            Union[float, Tuple[float, float]],
        )
        self.contrast_factor = utils.get_hyperparameter(
            contrast_factor,
            hyperparameters.Choice("contrast_factor", [0.0, 0.1]),
            Union[float, Tuple[float, float]],
        )

    @staticmethod
    def _get_fraction_value(value):
        if isinstance(value, tuple):
            return value
        return value, value

    def build(self, hp, inputs=None):
        input_node = tree.flatten(inputs)[0]
        output_node = input_node

        # Translate
        translation_factor = utils.add_to_hp(self.translation_factor, hp)
        if translation_factor not in [0, (0, 0)]:
            height_factor, width_factor = self._get_fraction_value(
                translation_factor
            )
            output_node = layers.RandomTranslation(height_factor, width_factor)(
                output_node
            )

        # Flip
        horizontal_flip = self.horizontal_flip
        if horizontal_flip is None:
            horizontal_flip = hp.Boolean("horizontal_flip", default=True)
        vertical_flip = self.vertical_flip
        if self.vertical_flip is None:
            vertical_flip = hp.Boolean("vertical_flip", default=True)
        if not horizontal_flip and not vertical_flip:
            flip_mode = ""
        elif horizontal_flip and vertical_flip:
            flip_mode = "horizontal_and_vertical"
        elif horizontal_flip and not vertical_flip:
            flip_mode = "horizontal"
        elif not horizontal_flip and vertical_flip:
            flip_mode = "vertical"
        if flip_mode != "":
            output_node = layers.RandomFlip(mode=flip_mode)(output_node)

        # Rotate
        rotation_factor = utils.add_to_hp(self.rotation_factor, hp)
        if rotation_factor != 0:
            output_node = layers.RandomRotation(rotation_factor)(output_node)

        # Zoom
        zoom_factor = utils.add_to_hp(self.zoom_factor, hp)
        if zoom_factor not in [0, (0, 0)]:
            height_factor, width_factor = self._get_fraction_value(zoom_factor)
            # TODO: Add back RandomZoom when it is ready.
            # output_node = layers.RandomZoom(
            # height_factor, width_factor)(output_node)

        # Contrast
        contrast_factor = utils.add_to_hp(self.contrast_factor, hp)
        if contrast_factor not in [0, (0, 0)]:
            output_node = layers.RandomContrast(contrast_factor)(output_node)

        return output_node

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "translation_factor": io_utils.serialize_block_arg(
                    self.translation_factor
                ),
                "horizontal_flip": self.horizontal_flip,
                "vertical_flip": self.vertical_flip,
                "rotation_factor": io_utils.serialize_block_arg(
                    self.rotation_factor
                ),
                "zoom_factor": io_utils.serialize_block_arg(self.zoom_factor),
                "contrast_factor": io_utils.serialize_block_arg(
                    self.contrast_factor
                ),
            }
        )
        return config

    @classmethod
    def from_config(cls, config):
        config["translation_factor"] = io_utils.deserialize_block_arg(
            config["translation_factor"]
        )
        config["rotation_factor"] = io_utils.deserialize_block_arg(
            config["rotation_factor"]
        )
        config["zoom_factor"] = io_utils.deserialize_block_arg(
            config["zoom_factor"]
        )
        config["contrast_factor"] = io_utils.deserialize_block_arg(
            config["contrast_factor"]
        )
        return cls(**config)


================================================
FILE: autokeras/blocks/preprocessing_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import keras
import keras_tuner
import tree
from keras_tuner.engine import hyperparameters

from autokeras import blocks
from autokeras import test_utils


def test_augment_build_return_tensor():
    block = blocks.ImageAugmentation(rotation_factor=0.2)

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32, 32, 3), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_augment_build_with_translation_factor_range_return_tensor():
    block = blocks.ImageAugmentation(translation_factor=(0, 0.1))

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32, 32, 3), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_augment_build_with_no_flip_return_tensor():
    block = blocks.ImageAugmentation(vertical_flip=False, horizontal_flip=False)

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32, 32, 3), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_augment_build_with_vflip_only_return_tensor():
    block = blocks.ImageAugmentation(vertical_flip=True, horizontal_flip=False)

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32, 32, 3), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_augment_build_with_zoom_factor_return_tensor():
    block = blocks.ImageAugmentation(zoom_factor=0.1)

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32, 32, 3), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_augment_build_with_contrast_factor_return_tensor():
    block = blocks.ImageAugmentation(contrast_factor=0.1)

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32, 32, 3), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_augment_deserialize_to_augment():
    serialized_block = blocks.serialize(
        blocks.ImageAugmentation(
            zoom_factor=0.1,
            contrast_factor=hyperparameters.Float("contrast_factor", 0.1, 0.5),
        )
    )

    block = blocks.deserialize(serialized_block)

    assert isinstance(block, blocks.ImageAugmentation)
    assert block.zoom_factor == 0.1
    assert isinstance(block.contrast_factor, hyperparameters.Float)


def test_augment_get_config_has_all_attributes():
    block = blocks.ImageAugmentation()

    config = block.get_config()

    assert test_utils.get_func_args(blocks.ImageAugmentation.__init__).issubset(
        config.keys()
    )


================================================
FILE: autokeras/blocks/reduction.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Optional

import keras
import tree
from keras import layers
from keras import ops

from autokeras.engine import block as block_module
from autokeras.utils import layer_utils
from autokeras.utils import utils

REDUCTION_TYPE = "reduction_type"
FLATTEN = "flatten"
GLOBAL_MAX = "global_max"
GLOBAL_AVG = "global_avg"


def shape_compatible(shape1, shape2):
    if len(shape1) != len(shape2):
        return False
    # TODO: If they can be the same after passing through any layer,
    #  they are compatible. e.g. (32, 32, 3), (16, 16, 2) are compatible
    return shape1[:-1] == shape2[:-1]


@keras.utils.register_keras_serializable(package="autokeras")
class Merge(block_module.Block):
    """Merge block to merge multiple nodes into one.

    # Arguments
        merge_type: String. 'add' or 'concatenate'. If left unspecified, it will
            be tuned automatically.
    """

    def __init__(self, merge_type: Optional[str] = None, **kwargs):
        super().__init__(**kwargs)
        self.merge_type = merge_type

    def get_config(self):
        config = super().get_config()
        config.update({"merge_type": self.merge_type})
        return config

    def build(self, hp, inputs=None):
        inputs = tree.flatten(inputs)
        if len(inputs) == 1:
            return inputs

        if not all(
            [
                shape_compatible(input_node.shape, inputs[0].shape)
                for input_node in inputs
            ]
        ):
            inputs = [Flatten().build(hp, input_node) for input_node in inputs]

        # TODO: Even inputs have different shape[-1], they can still be Add(
        #  ) after another layer. Check if the inputs are all of the same
        #  shape
        if self._inputs_same_shape(inputs):
            merge_type = self.merge_type or hp.Choice(
                "merge_type", ["add", "concatenate"], default="add"
            )
            if merge_type == "add":
                return layers.Add()(inputs)

        return layers.Concatenate()(inputs)

    def _inputs_same_shape(self, inputs):
        return all(input_node.shape == inputs[0].shape for input_node in inputs)


@keras.utils.register_keras_serializable(package="autokeras")
class Flatten(block_module.Block):
    """Flatten the input tensor with Keras Flatten layer."""

    def build(self, hp, inputs=None):
        inputs = tree.flatten(inputs)
        utils.validate_num_inputs(inputs, 1)
        input_node = inputs[0]
        if len(input_node.shape) > 2:
            return layers.Flatten()(input_node)
        return input_node


@keras.utils.register_keras_serializable(package="autokeras")
class Reduction(block_module.Block):
    def __init__(self, reduction_type: Optional[str] = None, **kwargs):
        super().__init__(**kwargs)
        self.reduction_type = reduction_type

    def get_config(self):
        config = super().get_config()
        config.update({REDUCTION_TYPE: self.reduction_type})
        return config

    def global_max(self, input_node):
        raise NotImplementedError

    def global_avg(self, input_node):
        raise NotImplementedError

    def build(self, hp, inputs=None):
        inputs = tree.flatten(inputs)
        utils.validate_num_inputs(inputs, 1)
        input_node = inputs[0]
        output_node = input_node

        # No need to reduce.
        if len(output_node.shape) <= 2:
            return output_node

        if self.reduction_type is not None:
            return self._build_block(hp, output_node, self.reduction_type)

        reduction_type = hp.Choice(
            REDUCTION_TYPE, [FLATTEN, GLOBAL_MAX, GLOBAL_AVG]
        )
        with hp.conditional_scope(REDUCTION_TYPE, [reduction_type]):
            return self._build_block(hp, output_node, reduction_type)

    def _build_block(self, hp, output_node, reduction_type):
        if reduction_type == FLATTEN:
            output_node = Flatten().build(hp, output_node)
        elif reduction_type == GLOBAL_MAX:
            output_node = self.global_max(output_node)
        elif reduction_type == GLOBAL_AVG:
            output_node = self.global_avg(output_node)
        return output_node


@keras.utils.register_keras_serializable(package="autokeras")
class SpatialReduction(Reduction):
    """Reduce the dimension of a spatial tensor, e.g. image, to a vector.

    # Arguments
        reduction_type: String. 'flatten', 'global_max' or 'global_avg'.
            If left unspecified, it will be tuned automatically.
    """

    def __init__(self, reduction_type: Optional[str] = None, **kwargs):
        super().__init__(reduction_type, **kwargs)

    def global_max(self, input_node):
        return layer_utils.get_global_max_pooling(input_node.shape)()(
            input_node
        )

    def global_avg(self, input_node):
        return layer_utils.get_global_average_pooling(input_node.shape)()(
            input_node
        )


@keras.utils.register_keras_serializable(package="autokeras")
class TemporalReduction(Reduction):
    """Reduce the dim of a temporal tensor, e.g. output of RNN, to a vector.

    # Arguments
        reduction_type: String. 'flatten', 'global_max' or 'global_avg'. If left
            unspecified, it will be tuned automatically.
    """

    def __init__(self, reduction_type: Optional[str] = None, **kwargs):
        super().__init__(reduction_type, **kwargs)

    def global_max(self, input_node):
        return ops.max(input_node, axis=-2)

    def global_avg(self, input_node):
        return ops.mean(input_node, axis=-2)


================================================
FILE: autokeras/blocks/reduction_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import keras
import keras_tuner
import tree

from autokeras import blocks
from autokeras import test_utils


def test_merge_build_return_tensor():
    block = blocks.Merge()

    outputs = block.build(
        keras_tuner.HyperParameters(),
        [
            keras.Input(shape=(32,), dtype="float32"),
            keras.Input(shape=(4, 8), dtype="float32"),
        ],
    )

    assert len(tree.flatten(outputs)) == 1


def test_merge_single_input_return_tensor():
    block = blocks.Merge()

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32,), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_merge_inputs_with_same_shape_return_tensor():
    block = blocks.Merge()

    outputs = block.build(
        keras_tuner.HyperParameters(),
        [
            keras.Input(shape=(32,), dtype="float32"),
            keras.Input(shape=(32,), dtype="float32"),
        ],
    )

    assert len(tree.flatten(outputs)) == 1


def test_merge_deserialize_to_merge():
    serialized_block = blocks.serialize(blocks.Merge())

    block = blocks.deserialize(serialized_block)

    assert isinstance(block, blocks.Merge)


def test_merge_get_config_has_all_attributes():
    block = blocks.Merge()

    config = block.get_config()

    assert test_utils.get_func_args(blocks.Merge.__init__).issubset(
        config.keys()
    )


def test_temporal_build_return_tensor():
    block = blocks.TemporalReduction()

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32, 10), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_temporal_global_max_return_tensor():
    block = blocks.TemporalReduction(reduction_type="global_max")

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32, 10), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_temporal_global_avg_return_tensor():
    block = blocks.TemporalReduction(reduction_type="global_avg")

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32, 10), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_reduction_2d_tensor_return_input_node():
    block = blocks.TemporalReduction()
    input_node = keras.Input(shape=(32,), dtype="float32")

    outputs = block.build(
        keras_tuner.HyperParameters(),
        input_node,
    )

    assert len(tree.flatten(outputs)) == 1
    assert tree.flatten(outputs)[0] is input_node


def test_temporal_deserialize_to_temporal():
    serialized_block = blocks.serialize(blocks.TemporalReduction())

    block = blocks.deserialize(serialized_block)

    assert isinstance(block, blocks.TemporalReduction)


def test_temporal_get_config_has_all_attributes():
    block = blocks.TemporalReduction()

    config = block.get_config()

    assert test_utils.get_func_args(blocks.TemporalReduction.__init__).issubset(
        config.keys()
    )


def test_spatial_build_return_tensor():
    block = blocks.SpatialReduction()

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32, 32, 3), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_spatial_deserialize_to_spatial():
    serialized_block = blocks.serialize(blocks.SpatialReduction())

    block = blocks.deserialize(serialized_block)

    assert isinstance(block, blocks.SpatialReduction)


def test_spatial_get_config_has_all_attributes():
    block = blocks.SpatialReduction()

    config = block.get_config()

    assert test_utils.get_func_args(blocks.SpatialReduction.__init__).issubset(
        config.keys()
    )


================================================
FILE: autokeras/blocks/wrapper.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Optional

import keras
import tree

from autokeras.blocks import basic
from autokeras.blocks import preprocessing
from autokeras.blocks import reduction
from autokeras.engine import block as block_module
from autokeras.utils import utils

BLOCK_TYPE = "block_type"
RESNET = "resnet"
XCEPTION = "xception"
VANILLA = "vanilla"
EFFICIENT = "efficient"
NORMALIZE = "normalize"
AUGMENT = "augment"
MAX_TOKENS = "max_tokens"


@keras.utils.register_keras_serializable(package="autokeras")
class ImageBlock(block_module.Block):
    """Block for image data.

    The image blocks is a block choosing from ResNetBlock, XceptionBlock,
    ConvBlock, which is controlled by a hyperparameter, 'block_type'.

    # Arguments
        block_type: String. 'resnet', 'xception', 'vanilla'. The type of Block
            to use. If unspecified, it will be tuned automatically.
        normalize: Boolean. Whether to channel-wise normalize the images.
            If unspecified, it will be tuned automatically.
        augment: Boolean. Whether to do image augmentation. If unspecified,
            it will be tuned automatically.
    """

    def __init__(
        self,
        block_type: Optional[str] = None,
        normalize: Optional[bool] = None,
        augment: Optional[bool] = None,
        **kwargs
    ):
        super().__init__(**kwargs)
        self.block_type = block_type
        self.normalize = normalize
        self.augment = augment

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                BLOCK_TYPE: self.block_type,
                NORMALIZE: self.normalize,
                AUGMENT: self.augment,
            }
        )
        return config

    def _build_block(self, hp, output_node, block_type):
        if block_type == RESNET:
            return basic.ResNetBlock().build(hp, output_node)
        elif block_type == XCEPTION:
            return basic.XceptionBlock().build(hp, output_node)
        elif block_type == VANILLA:
            return basic.ConvBlock().build(hp, output_node)
        elif block_type == EFFICIENT:
            return basic.EfficientNetBlock().build(hp, output_node)

    def build(self, hp, inputs=None):
        input_node = tree.flatten(inputs)[0]
        output_node = input_node

        if self.normalize is None and hp.Boolean(NORMALIZE):
            with hp.conditional_scope(NORMALIZE, [True]):
                output_node = preprocessing.Normalization().build(
                    hp, output_node
                )
        elif self.normalize:
            output_node = preprocessing.Normalization().build(hp, output_node)

        if self.augment is None and hp.Boolean(AUGMENT):
            with hp.conditional_scope(AUGMENT, [True]):
                output_node = preprocessing.ImageAugmentation().build(
                    hp, output_node
                )
        elif self.augment:
            output_node = preprocessing.ImageAugmentation().build(
                hp, output_node
            )

        if self.block_type is None:
            block_type = hp.Choice(
                BLOCK_TYPE, [RESNET, XCEPTION, VANILLA, EFFICIENT]
            )
            with hp.conditional_scope(BLOCK_TYPE, [block_type]):
                output_node = self._build_block(hp, output_node, block_type)
        else:
            output_node = self._build_block(hp, output_node, self.block_type)

        return output_node


@keras.utils.register_keras_serializable(package="autokeras")
class TextBlock(block_module.Block):
    """Block for text data.

    # Arguments
        max_tokens: Int. The maximum size of the vocabulary.
            If left unspecified, it will be tuned automatically.
    """

    def __init__(self, max_tokens=None, **kwargs):
        super().__init__(**kwargs)
        self.max_tokens = max_tokens

    def build(self, hp, inputs=None):
        input_node = tree.flatten(inputs)[0]
        output_node = input_node
        output_node = self._build_block(hp, output_node)
        return output_node

    def get_config(self):
        config = super().get_config()
        config.update({"max_tokens": self.max_tokens})
        return config

    def _build_block(self, hp, output_node):
        # Use Embedding and dense layers for tokenized text
        max_tokens = self.max_tokens or hp.Choice(
            MAX_TOKENS, [500, 5000, 20000], default=5000
        )
        output_node = basic.Embedding(
            max_features=max_tokens + 1,
        ).build(hp, output_node)
        output_node = basic.ConvBlock().build(hp, output_node)
        output_node = reduction.SpatialReduction().build(hp, output_node)
        output_node = basic.DenseBlock().build(hp, output_node)
        return output_node


@keras.utils.register_keras_serializable(package="autokeras")
class GeneralBlock(block_module.Block):
    """A general neural network block when the input type is unknown.

    When the input type is unknown. The GeneralBlock would search in a large
    space for a good model.

    # Arguments
        name: String.
    """

    def build(self, hp, inputs=None):
        inputs = tree.flatten(inputs)
        utils.validate_num_inputs(inputs, 1)
        input_node = inputs[0]
        output_node = input_node

        output_node = reduction.Flatten().build(hp, output_node)
        output_node = basic.DenseBlock().build(hp, output_node)
        return output_node


@keras.utils.register_keras_serializable(package="autokeras")
class StructuredDataBlock(block_module.Block):
    """Block for structured data.

    # Arguments
        categorical_encoding: Boolean. Whether to use the CategoricalToNumerical
            to encode the categorical features to numerical features. Defaults
            to True.
        normalize: Boolean. Whether to normalize the features.
            If unspecified, it will be tuned automatically.
    """

    def __init__(self, normalize: Optional[bool] = None, **kwargs):
        super().__init__(**kwargs)
        self.normalize = normalize

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "normalize": self.normalize,
            }
        )
        return config

    def build(self, hp, inputs=None):
        input_node = tree.flatten(inputs)[0]
        output_node = input_node

        if self.normalize is None and hp.Boolean(NORMALIZE):
            with hp.conditional_scope(NORMALIZE, [True]):
                output_node = preprocessing.Normalization().build(
                    hp, output_node
                )
        elif self.normalize:
            output_node = preprocessing.Normalization().build(hp, output_node)

        output_node = basic.DenseBlock().build(hp, output_node)
        return output_node


================================================
FILE: autokeras/blocks/wrapper_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import keras
import keras_tuner
import tree

from autokeras import analysers
from autokeras import blocks
from autokeras import test_utils


def test_image_build_return_tensor():
    block = blocks.ImageBlock()

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32, 32, 3), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_general_build_return_tensor():
    block = blocks.GeneralBlock()

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32, 32, 3), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_image_block_xception_return_tensor():
    block = blocks.ImageBlock(block_type="xception")

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32, 32, 3), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_image_block_normalize_return_tensor():
    block = blocks.ImageBlock(normalize=True)

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32, 32, 3), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_image_block_augment_return_tensor():
    block = blocks.ImageBlock(augment=True)

    outputs = block.build(
        keras_tuner.HyperParameters(),
        keras.Input(shape=(32, 32, 3), dtype="float32"),
    )

    assert len(tree.flatten(outputs)) == 1


def test_image_deserialize_to_image():
    serialized_block = blocks.serialize(blocks.ImageBlock())

    block = blocks.deserialize(serialized_block)

    assert isinstance(block, blocks.ImageBlock)


def test_image_get_config_has_all_attributes():
    block = blocks.ImageBlock()

    config = block.get_config()

    assert test_utils.get_func_args(blocks.ImageBlock.__init__).issubset(
        config.keys()
    )


def test_text_build_return_tensor():
    block = blocks.TextBlock()

    outputs = block.build(
        keras_tuner.HyperParameters(), keras.Input(shape=(1,), dtype="string")
    )

    assert len(tree.flatten(outputs)) == 1


def test_text_deserialize_to_text():
    serialized_block = blocks.serialize(blocks.TextBlock())

    block = blocks.deserialize(serialized_block)

    assert isinstance(block, blocks.TextBlock)


def test_text_get_config_has_all_attributes():
    block = blocks.TextBlock()

    config = block.get_config()

    assert test_utils.get_func_args(blocks.TextBlock.__init__).issubset(
        config.keys()
    )


def test_structured_build_return_tensor():
    block = blocks.StructuredDataBlock()
    block.column_names = ["0", "1"]
    block.column_types = {"0": analysers.NUMERICAL, "1": analysers.NUMERICAL}

    outputs = block.build(
        keras_tuner.HyperParameters(), keras.Input(shape=(2,), dtype="string")
    )

    assert len(tree.flatten(outputs)) == 1


def test_structured_block_normalize_return_tensor():
    block = blocks.StructuredDataBlock(normalize=True)
    block.column_names = ["0", "1"]
    block.column_types = {"0": analysers.NUMERICAL, "1": analysers.NUMERICAL}

    outputs = block.build(
        keras_tuner.HyperParameters(), keras.Input(shape=(2,), dtype="string")
    )

    assert len(tree.flatten(outputs)) == 1


def test_structured_block_search_normalize_return_tensor():
    block = blocks.StructuredDataBlock(name="a")
    block.column_names = ["0", "1"]
    block.column_types = {"0": analysers.NUMERICAL, "1": analysers.NUMERICAL}
    hp = keras_tuner.HyperParameters()
    hp.values["a/" + blocks.wrapper.NORMALIZE] = True

    outputs = block.build(hp, keras.Input(shape=(2,), dtype="string"))

    assert len(tree.flatten(outputs)) == 1


def test_structured_deserialize_to_structured():
    serialized_block = blocks.serialize(blocks.StructuredDataBlock())

    block = blocks.deserialize(serialized_block)

    assert isinstance(block, blocks.StructuredDataBlock)


def test_structured_get_config_has_all_attributes():
    block = blocks.StructuredDataBlock()

    config = block.get_config()

    assert test_utils.get_func_args(
        blocks.StructuredDataBlock.__init__
    ).issubset(config.keys())


================================================
FILE: autokeras/conftest.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import shutil

import keras
import pytest


@pytest.fixture(autouse=True)
def clear_session():
    keras.backend.clear_session()
    yield
    keras.backend.clear_session()


@pytest.fixture(autouse=True)
def remove_tmp_path(tmp_path):
    yield
    shutil.rmtree(tmp_path)


@pytest.fixture(autouse=True)
def disable_traceback_filtering():
    keras.config.disable_traceback_filtering()
    yield


================================================
FILE: autokeras/engine/__init__.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: autokeras/engine/adapter.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


class Adapter(object):
    """Adpat the input and output format for Keras Model.

    Adapter is used by the input nodes and the heads of the hypermodel. It do
    some type checking for the data and converts it to compatible format.
    """

    def check(self, dataset):
        """Check if the dataset is valid for the input node.

        # Arguments
            dataset: numpy.ndarray. The dataset to be checked.

        # Returns
            Boolean. Whether the dataset is in compatible format.
        """
        return True

    def adapt(self, dataset):
        """Check, convert and batch the dataset.

        # Arguments
            dataset: Usually numpy.ndarray. The dataset to be converted.

        # Returns
            numpy.ndarray. The converted dataset.
        """
        self.check(dataset)
        return dataset


================================================
FILE: autokeras/engine/adapter_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from autokeras.engine import adapter as adapter_module


def test_adapter_check_return_true():
    adapter = adapter_module.Adapter()

    assert adapter.check(None)


================================================
FILE: autokeras/engine/analyser.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import numpy as np


class Analyser(object):
    """Analyze the dataset for useful information.

    Analyser is used by the input nodes and the heads of the hypermodel.  It
    analyzes the dataset to get useful information, e.g., the shape of the
    data, the data type of the dataset. The information will be used by the
    input nodes and heads to construct the data pipeline and to build the Keras
    Model.
    """

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        # Shape is a list of integers
        self.shape = None
        self.dtype = None
        self.num_samples = 0
        self.batch_size = None

    def update(self, data):
        """Update the statistics with a batch of data.

        # Arguments
            data: np.ndarray. The entire dataset.
        """
        if self.dtype is None:
            if np.issubdtype(data.dtype, np.str_) or np.issubdtype(
                data.dtype, np.bytes_
            ):
                self.dtype = "string"
            else:
                self.dtype = str(data.dtype)
        if self.shape is None:
            self.shape = list(data.shape)
        if self.batch_size is None:
            self.batch_size = data.shape[0]
        self.num_samples += data.shape[0]

    def finalize(self):
        """Process recorded information after all updates."""
        raise NotImplementedError


================================================
FILE: autokeras/engine/analyser_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import pytest

from autokeras.engine.analyser import Analyser


def test_analyser_update_unicode_string_dtype():
    analyser = Analyser()
    data = np.array(["hello", "world"], dtype="U10")

    analyser.update(data)

    assert analyser.dtype == "string"
    assert analyser.shape == [2]
    assert analyser.batch_size == 2
    assert analyser.num_samples == 2


def test_analyser_update_byte_string_dtype():
    analyser = Analyser()
    data = np.array([b"hello", b"world"], dtype="S10")

    analyser.update(data)

    assert analyser.dtype == "string"
    assert analyser.shape == [2]
    assert analyser.batch_size == 2
    assert analyser.num_samples == 2


def test_analyser_update_numeric_dtype():
    analyser = Analyser()
    data = np.array([1, 2, 3], dtype=np.int32)

    analyser.update(data)

    assert analyser.dtype == "int32"
    assert analyser.shape == [3]
    assert analyser.batch_size == 3
    assert analyser.num_samples == 3


def test_analyser_update_float_dtype():
    analyser = Analyser()
    data = np.array([1.0, 2.0, 3.0], dtype=np.float64)

    analyser.update(data)

    assert analyser.dtype == "float64"
    assert analyser.shape == [3]
    assert analyser.batch_size == 3
    assert analyser.num_samples == 3


def test_analyser_finalize_not_implemented():
    analyser = Analyser()

    with pytest.raises(NotImplementedError):
        analyser.finalize()


================================================
FILE: autokeras/engine/block.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import tree

from autokeras.engine import named_hypermodel
from autokeras.engine import node as node_module


class Block(named_hypermodel.NamedHyperModel):
    """The base class for different Block.

    The Block can be connected together to build the search space for an
    AutoModel. Notably, many args in the __init__ function are defaults to be a
    tunable variable when not specified by the user.

    """

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.inputs = None
        self.outputs = None
        self._num_output_node = 1

    def _build_wrapper(self, hp, *args, **kwargs):
        with hp.name_scope(self.name):
            return super()._build_wrapper(hp, *args, **kwargs)

    def __call__(self, inputs):
        """Functional API.

        # Arguments
            inputs: A list of input node(s) or a single input node for the
                block.

        # Returns
            list: A list of output node(s) of the Block.
        """
        self.inputs = tree.flatten(inputs)
        for input_node in self.inputs:
            if not isinstance(input_node, node_module.Node):
                raise TypeError(
                    "Expect the inputs to block {name} to be "
                    "a Node, but got {type}.".format(
                        name=self.name, type=type(input_node)
                    )
                )
            input_node.add_out_block(self)
        self.outputs = []
        for _ in range(self._num_output_node):
            output_node = node_module.Node()
            output_node.add_in_block(self)
            self.outputs.append(output_node)
        return self.outputs

    def build(self, hp, inputs=None):
        """Build the Block into a real Keras Model.

        The subclasses should override this function and return the output node.

        # Arguments
            hp: HyperParameters. The hyperparameters for building the model.
            inputs: A list of input node(s).
        """
        raise NotImplementedError


================================================
FILE: autokeras/engine/block_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest

from autokeras.engine import block as block_module


def test_block_call_raise_inputs_type_error():
    block = block_module.Block()

    with pytest.raises(TypeError) as info:
        block(None)

    assert "Expect the inputs to block" in str(info.value)


================================================
FILE: autokeras/engine/head.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Optional

import keras

from autokeras.engine import io_hypermodel
from autokeras.utils import types


def serialize_metrics(metrics):
    serialized = []
    for metric in metrics:
        if isinstance(metric, str):
            serialized.append([metric])
        else:
            serialized.append(keras.metrics.serialize(metric))
    return serialized


def deserialize_metrics(metrics):
    deserialized = []
    for metric in metrics:
        if isinstance(metric, list):
            deserialized.append(metric[0])
        else:
            deserialized.append(keras.metrics.deserialize(metric))
    return deserialized


def serialize_loss(loss):
    if isinstance(loss, str):
        return [loss]
    return keras.losses.serialize(loss)


def deserialize_loss(loss):
    if isinstance(loss, list):
        return loss[0]
    return keras.losses.deserialize(loss)


class Head(io_hypermodel.IOHyperModel):
    """Base class for the heads, e.g. classification, regression.

    # Arguments
        loss: A Keras loss function. Defaults to None. If None, the loss will be
            inferred from the AutoModel.
        metrics: A list of Keras metrics. Defaults to None. If None, the metrics
            will be inferred from the AutoModel.
    """

    def __init__(
        self,
        loss: Optional[types.LossType] = None,
        metrics: Optional[types.MetricsType] = None,
        **kwargs
    ):
        super().__init__(**kwargs)
        self.loss = loss
        if metrics is None:
            metrics = []
        self.metrics = metrics
        # Mark if the head should directly output the input tensor.

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "loss": serialize_loss(self.loss),
                "metrics": serialize_metrics(self.metrics),
            }
        )
        return config

    @classmethod
    def from_config(cls, config):
        config["loss"] = deserialize_loss(config["loss"])
        config["metrics"] = deserialize_metrics(config["metrics"])
        return super().from_config(config)

    def build(self, hp, inputs=None):
        raise NotImplementedError


================================================
FILE: autokeras/engine/head_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from autokeras.engine import head as head_module


def test_init_head_none_metrics():
    assert isinstance(head_module.Head().metrics, list)


================================================
FILE: autokeras/engine/hyper_preprocessor.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from autokeras.engine import named_hypermodel


class HyperPreprocessor(named_hypermodel.NamedHyperModel):
    """Input data preprocessor search space.

    This class defines the search space for a Preprocessor.
    """

    def build(self, hp, dataset):
        """Build the input preprocessor.

        # Arguments
            hp: `HyperParameters` instance. The hyperparameters for building the
                a Preprocessor.
            dataset: np.ndarray. The dataset to be preprocessed.

        # Returns
            an instance of Preprocessor.
        """
        raise NotImplementedError


================================================
FILE: autokeras/engine/io_hypermodel.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from autokeras.engine import block as block_module


class IOHyperModel(block_module.Block):
    """A mixin class connecting the input nodes and heads with the adapters.

    This class is extended by the input nodes and the heads. The AutoModel calls
    the functions to get the corresponding adapters and pass the information
    back to the input nodes and heads.
    """

    def __init__(self, shape=None, **kwargs):
        super().__init__(**kwargs)
        self.shape = shape
        self.data_shape = None
        self.dtype = None
        self.batch_size = None
        self.num_samples = None

    def get_analyser(self):
        """Get the corresponding Analyser.

        # Returns
            An instance of a subclass of autokeras.engine.Analyser.
        """
        raise NotImplementedError

    def get_adapter(self):
        """Get the corresponding Adapter.

        # Returns
            An instance of a subclass of autokeras.engine.Adapter.
        """
        raise NotImplementedError

    def config_from_analyser(self, analyser):
        """Load the learned information on dataset from the Analyser.

        # Arguments
            adapter: An instance of a subclass of autokeras.engine.Adapter.
        """
        self.data_shape = analyser.shape
        self.dtype = analyser.dtype
        self.batch_size = analyser.batch_size
        self.num_samples = analyser.num_samples

    def get_hyper_preprocessors(self):
        """Construct a list of HyperPreprocessors based on learned information.

        # Returns
            A list of HyperPreprocessors for the corresponding data.
        """
        raise NotImplementedError

    def get_config(self):
        config = super().get_config()
        config.update({"shape": self.shape})
        return config


================================================
FILE: autokeras/engine/named_hypermodel.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import keras
import keras_tuner

from autokeras.engine import serializable
from autokeras.utils import utils


class NamedHyperModel(keras_tuner.HyperModel, serializable.Serializable):
    """

    # Arguments
        name: String. The name of the HyperModel. If unspecified, it will be set
            automatically with the class name.
    """

    def __init__(self, name: str = None, **kwargs):
        if not name:
            prefix = self.__class__.__name__
            name = prefix + "_" + str(keras.backend.get_uid(prefix))
            name = utils.to_snake_case(name)
        super().__init__(name=name, **kwargs)

    def get_config(self):
        """Get the configuration of the preprocessor.

        # Returns
            A dictionary of configurations of the preprocessor.
        """
        return {"name": self.name, "tunable": self.tunable}


================================================
FILE: autokeras/engine/node.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


class Node(object):
    """The nodes in a network connecting the blocks."""

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.in_blocks = []
        self.out_blocks = []

    def add_in_block(self, hypermodel):
        self.in_blocks.append(hypermodel)

    def add_out_block(self, hypermodel):
        self.out_blocks.append(hypermodel)


================================================
FILE: autokeras/engine/node_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: autokeras/engine/preprocessor.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from autokeras.engine import serializable


class Preprocessor(serializable.Serializable):
    """A preprocessor for np.ndarray.

    A preprocessor transforms the dataset (numpy.ndarray).
    """

    def fit(self, dataset):
        """Fit the preprocessor with the dataset.

        # Arguments
            dataset: an instance of `np.ndarray`.
        """
        # TODO: may need to change to a streaming way of fit to reduce the
        # number of iterations through the dataset for speed. Need to be
        # decided when we have more use cases for this fit.
        pass

    def transform(self, dataset):
        """Transform the dataset wth the preprocessor.

        # Arguments
            dataset: an instance of `np.ndarray`.

        # Returns
            The transformed dataset.
        """
        raise NotImplementedError

    def get_config(self):
        return {}


class TargetPreprocessor(Preprocessor):
    """Preprocessor for target data."""

    def postprocess(self, dataset):
        """Postprocess the output of the Keras model.

        # Arguments
            dataset: numpy.ndarray. The corresponding output of the model.

        # Returns
            numpy.ndarray. The postprocessed data.
        """
        raise NotImplementedError


================================================
FILE: autokeras/engine/serializable.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


class Serializable(object):
    """Serializable from and to JSON with same mechanism as Keras Layer."""

    def get_config(self):
        """Returns the current config of this object.

        # Returns
            Dictionary.
        """
        raise NotImplementedError

    @classmethod
    def from_config(cls, config):
        """Build an instance from the config of this object.

        # Arguments
            config: Dict. The config of the object.
        """
        return cls(**config)


================================================
FILE: autokeras/engine/tuner.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import collections
import copy
import os

import keras
import keras_tuner
import numpy as np
import tree
from keras import callbacks as callbacks_module

from autokeras import keras_layers
from autokeras import pipeline as pipeline_module
from autokeras.utils import data_utils
from autokeras.utils import utils


class AutoTuner(keras_tuner.engine.tuner.Tuner):
    """A Tuner class based on KerasTuner for AutoKeras.

    Different from KerasTuner's Tuner class. AutoTuner's not only tunes the
    Hypermodel which can be directly built into a Keras model, but also the
    preprocessors. Therefore, a HyperGraph stores the overall search space
    containing both the Preprocessors and Hypermodel. For every trial, the
    HyperGraph builds the PreprocessGraph and KerasGraph with the provided
    HyperParameters.

    The AutoTuner uses EarlyStopping for acceleration during the search and
    fully trains the model with full epochs and with both training and
    validation data.  The fully trained model is the best model to be used by
    AutoModel.

    # Arguments
        oracle: keras_tuner Oracle.
        hypermodel: keras_tuner HyperModel.
        **kwargs: The args supported by KerasTuner.
    """

    def __init__(self, oracle, hypermodel, **kwargs):
        # Initialize before super() for reload to work.
        self._finished = False
        super().__init__(oracle, hypermodel, **kwargs)
        # Save or load the HyperModel.
        self.hypermodel.save(os.path.join(self.project_dir, "graph"))
        self.hyper_pipeline = None

    def _populate_initial_space(self):
        # Override the function to prevent building the model during
        # initialization.
        return

    def get_best_model(self):
        return self.get_best_models()[0]

    def get_best_pipeline(self):
        return pipeline_module.load_pipeline(self.best_pipeline_path)

    def _pipeline_path(self, trial_id):
        return os.path.join(self.get_trial_dir(trial_id), "pipeline")

    def _prepare_model_build(self, hp, **kwargs):
        """Prepare for building the Keras model.

        It builds the Pipeline from HyperPipeline, transforms the dataset to set
        the input shapes and output shapes of the HyperModel.
        """
        x = kwargs["x"]
        y = kwargs["y"]
        pipeline = self.hyper_pipeline.build(hp, (x, y))
        pipeline.fit((x, y))
        (x, y) = pipeline.transform((x, y))
        self.hypermodel.set_io_shapes(data_utils.dataset_shape((x, y)))

        if "validation_data" in kwargs:
            validation_data = pipeline.transform(kwargs["validation_data"])
        else:
            validation_data = None
        return pipeline, (x, y), validation_data

    def _build_and_fit_model(self, trial, *args, **kwargs):
        model = self._try_build(trial.hyperparameters)
        (
            pipeline,
            (
                kwargs["x"],
                kwargs["y"],
            ),
            kwargs["validation_data"],
        ) = self._prepare_model_build(trial.hyperparameters, **kwargs)
        pipeline.save(self._pipeline_path(trial.trial_id))
        keras.src.backend.compute_output_spec(model, kwargs["x"])

        self.adapt(model, kwargs["x"])

        _, history = utils.fit_with_adaptive_batch_size(model, **kwargs)
        return history

    @staticmethod
    def adapt(model, dataset):
        """Adapt the preprocessing layers in the model."""
        # Currently, only support using the original dataset to adapt all the
        # preprocessing layers before the first non-preprocessing layer.
        # TODO: Use PreprocessingStage for preprocessing layers adapt.
        # TODO: Use Keras Tuner for preprocessing layers adapt.
        x = tree.flatten(dataset)

        def get_output_layers(tensor):
            output_layers = []
            tensor = tree.flatten(tensor)[0]
            for layer in model.layers:
                if isinstance(layer, keras.layers.InputLayer):
                    continue
                input_node = tree.flatten(layer.input)[0]
                if input_node is tensor:
                    if isinstance(
                        layer,
                        keras_layers.PreprocessingLayer,
                    ) or hasattr(layer, "adapt"):
                        output_layers.append(layer)
            return output_layers

        dq = collections.deque()

        for index, input_node in enumerate(tree.flatten(model.input)):
            in_x = x[index]
            for layer in get_output_layers(input_node):
                dq.append((layer, in_x))

        while len(dq):
            layer, in_x = dq.popleft()
            layer.adapt(in_x)
            out_x = layer(in_x)
            for next_layer in get_output_layers(layer.output):
                dq.append((next_layer, out_x))

        return model

    def search(
        self,
        epochs=None,
        callbacks=None,
        validation_split=0,
        verbose=1,
        **fit_kwargs
    ):
        """Search for the best HyperParameters.

        If there is not early-stopping in the callbacks, the early-stopping
        callback is injected to accelerate the search process. At the end of the
        search, the best model will be fully trained with the specified number
        of epochs.

        # Arguments
            callbacks: A list of callback functions. Defaults to None.
            validation_split: Float.
        """
        if self._finished:
            return

        if callbacks is None:
            callbacks = []

        self.hypermodel.set_fit_args(validation_split, epochs=epochs)

        # Insert early-stopping for adaptive number of epochs.
        epochs_provided = True
        if epochs is None:
            epochs_provided = False
            epochs = 1000
            if not utils.contain_instance(
                callbacks, callbacks_module.EarlyStopping
            ):
                callbacks.append(
                    callbacks_module.EarlyStopping(patience=10, min_delta=1e-4)
                )

        # Insert early-stopping for acceleration.
        early_stopping_inserted = False
        new_callbacks = self._deepcopy_callbacks(callbacks)
        if not utils.contain_instance(
            callbacks, callbacks_module.EarlyStopping
        ):
            early_stopping_inserted = True
            new_callbacks.append(
                callbacks_module.EarlyStopping(patience=10, min_delta=1e-4)
            )

        # Populate initial search space.
        hp = self.oracle.get_space()
        self._prepare_model_build(hp, **fit_kwargs)
        self._try_build(hp)
        self.oracle.update_space(hp)
        super().search(
            epochs=epochs,
            callbacks=new_callbacks,
            verbose=verbose,
            **fit_kwargs
        )

        # Train the best model use validation data.
        # Train the best model with enough number of epochs.
        if validation_split > 0 or early_stopping_inserted:
            copied_fit_kwargs = copy.copy(fit_kwargs)

            # Remove early-stopping since no validation data.
            # Remove early-stopping since it is inserted.
            copied_fit_kwargs["callbacks"] = self._remove_early_stopping(
                callbacks
            )

            # Decide the number of epochs.
            copied_fit_kwargs["epochs"] = epochs
            if not epochs_provided:
                copied_fit_kwargs["epochs"] = self._get_best_trial_epochs()

            # Concatenate training and validation data.
            if validation_split > 0:
                x, y = copied_fit_kwargs["x"], copied_fit_kwargs["y"]
                x_val, y_val = fit_kwargs["validation_data"]
                copied_fit_kwargs["x"] = tree.map_structure(
                    lambda train, val: np.concatenate([train, val], axis=0),
                    x,
                    x_val,
                )
                copied_fit_kwargs["y"] = tree.map_structure(
                    lambda train, val: np.concatenate([train, val], axis=0),
                    y,
                    y_val,
                )
                copied_fit_kwargs.pop("validation_data")

            self.hypermodel.set_fit_args(0, epochs=copied_fit_kwargs["epochs"])
            copied_fit_kwargs["verbose"] = verbose
            pipeline, model, history = self.final_fit(**copied_fit_kwargs)
        else:
            # TODO: Add return history functionality in Keras Tuner
            model = self.get_best_model()
            history = None
            pipeline = pipeline_module.load_pipeline(
                self._pipeline_path(self.oracle.get_best_trials(1)[0].trial_id)
            )

        model.save(self.best_model_path)
        pipeline.save(self.best_pipeline_path)
        self._finished = True
        return history

    def get_state(self):
        state = super().get_state()
        state.update({"finished": self._finished})
        return state

    def set_state(self, state):
        super().set_state(state)
        self._finished = state.get("finished")

    @staticmethod
    def _remove_early_stopping(callbacks):
        return [
            copy.deepcopy(callbacks)
            for callback in callbacks
            if not isinstance(callback, callbacks_module.EarlyStopping)
        ]

    def _get_best_trial_epochs(self):
        best_trial = self.oracle.get_best_trials(1)[0]
        # steps counts from 0, so epochs = step + 1.
        return self.oracle.get_trial(best_trial.trial_id).best_step + 1

    def _build_best_model(self):
        best_trial = self.oracle.get_best_trials(1)[0]
        best_hp = best_trial.hyperparameters
        return self._try_build(best_hp)

    def final_fit(self, **kwargs):
        best_trial = self.oracle.get_best_trials(1)[0]
        best_hp = best_trial.hyperparameters
        (
            pipeline,
            (kwargs["x"], kwargs["y"]),
            kwargs["validation_data"],
        ) = self._prepare_model_build(best_hp, **kwargs)

        model = self._build_best_model()
        self.adapt(model, kwargs["x"])
        model, history = utils.fit_with_adaptive_batch_size(model, **kwargs)
        return pipeline, model, history

    @property
    def best_model_path(self):
        return os.path.join(self.project_dir, "best_model.keras")

    @property
    def best_pipeline_path(self):
        return os.path.join(self.project_dir, "best_pipeline")

    @property
    def objective(self):
        return self.oracle.objective

    @property
    def max_trials(self):
        return self.oracle.max_trials


================================================
FILE: autokeras/engine/tuner_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from unittest import mock

import keras
import numpy as np
import tree

import autokeras as ak
from autokeras import test_utils
from autokeras.tuners import greedy


def called_with_early_stopping(func):
    callbacks = func.call_args_list[0][1]["callbacks"]
    return any(
        [
            isinstance(callback, keras.callbacks.EarlyStopping)
            for callback in callbacks
        ]
    )


@mock.patch("keras_tuner.engine.base_tuner.BaseTuner.search")
@mock.patch("autokeras.engine.tuner.AutoTuner.final_fit")
@mock.patch("autokeras.engine.tuner.AutoTuner._prepare_model_build")
def test_final_fit_with_specified_epochs(_, final_fit, super_search, tmp_path):
    tuner = greedy.Greedy(
        hypermodel=test_utils.build_graph(), directory=tmp_path
    )
    final_fit.return_value = mock.Mock(), mock.Mock(), mock.Mock()

    tuner.search(x=None, epochs=10, validation_data=None)

    assert final_fit.call_args_list[0][1]["epochs"] == 10


@mock.patch("keras_tuner.engine.base_tuner.BaseTuner.search")
@mock.patch("autokeras.engine.tuner.AutoTuner.final_fit")
@mock.patch("autokeras.engine.tuner.AutoTuner._prepare_model_build")
def test_tuner_call_super_with_early_stopping(
    _, final_fit, super_search, tmp_path
):
    tuner = greedy.Greedy(
        hypermodel=test_utils.build_graph(), directory=tmp_path
    )
    final_fit.return_value = mock.Mock(), mock.Mock(), mock.Mock()

    tuner.search(x=None, epochs=10, validation_data=None)

    assert called_with_early_stopping(super_search)


@mock.patch("keras_tuner.engine.base_tuner.BaseTuner.search")
@mock.patch("autokeras.engine.tuner.AutoTuner.final_fit")
@mock.patch(
    "autokeras.engine.tuner.AutoTuner.get_best_models",
    return_value=[mock.Mock()],
)
@mock.patch("autokeras.engine.tuner.AutoTuner._prepare_model_build")
@mock.patch("autokeras.pipeline.load_pipeline")
@mock.patch("keras_tuner.Oracle.get_best_trials", return_value=[mock.Mock()])
def test_no_final_fit_without_epochs_and_fov(
    _, _1, _2, get_best_models, final_fit, super_search, tmp_path
):
    tuner = greedy.Greedy(
        hypermodel=test_utils.build_graph(), directory=tmp_path
    )

    tuner.search(x=None, epochs=None, validation_data=None)

    final_fit.assert_not_called()


@mock.patch("keras_tuner.engine.base_tuner.BaseTuner.search")
@mock.patch("autokeras.engine.tuner.AutoTuner.final_fit")
@mock.patch(
    "autokeras.engine.tuner.AutoTuner._get_best_trial_epochs", return_value=2
)
@mock.patch("autokeras.engine.tuner.AutoTuner._prepare_model_build")
def test_final_fit_best_epochs_if_epoch_unspecified(
    _, best_epochs, final_fit, super_search, tmp_path
):
    tuner = greedy.Greedy(
        hypermodel=test_utils.build_graph(), directory=tmp_path
    )
    final_fit.return_value = mock.Mock(), mock.Mock(), mock.Mock()

    tuner.search(
        x=np.random.rand(100, 32, 32, 3),
        y=np.random.rand(100, 1),
        epochs=None,
        validation_split=0.2,
        validation_data=(np.random.rand(20, 32, 32, 3), np.random.rand(20, 1)),
    )

    assert final_fit.call_args_list[0][1]["epochs"] == 2


@mock.patch("keras_tuner.engine.base_tuner.BaseTuner.search")
@mock.patch("autokeras.engine.tuner.AutoTuner.final_fit")
@mock.patch(
    "autokeras.engine.tuner.AutoTuner._get_best_trial_epochs", return_value=2
)
@mock.patch("autokeras.engine.tuner.AutoTuner._prepare_model_build")
def test_super_with_1k_epochs_if_epoch_unspecified(
    _, best_epochs, final_fit, super_search, tmp_path
):
    tuner = greedy.Greedy(
        hypermodel=test_utils.build_graph(), directory=tmp_path
    )
    final_fit.return_value = mock.Mock(), mock.Mock(), mock.Mock()

    # TODO: try to use x, and y instead of tuple input across the lib.
    tuner.search(
        x=np.random.rand(100, 32, 32, 3),
        y=np.random.rand(100, 1),
        epochs=None,
        validation_split=0.2,
        validation_data=(np.random.rand(20, 32, 32, 3), np.random.rand(20, 1)),
    )

    assert super_search.call_args_list[0][1]["epochs"] == 1000
    assert called_with_early_stopping(super_search)


@mock.patch("keras_tuner.engine.base_tuner.BaseTuner.search")
@mock.patch("autokeras.engine.tuner.AutoTuner.final_fit")
@mock.patch("autokeras.engine.tuner.AutoTuner._prepare_model_build")
def test_tuner_not_call_super_search_with_overwrite(
    _, final_fit, super_search, tmp_path
):
    tuner = greedy.Greedy(
        hypermodel=test_utils.build_graph(), directory=tmp_path
    )
    final_fit.return_value = mock.Mock(), mock.Mock(), mock.Mock()

    tuner.search(x=None, epochs=10, validation_data=None)
    tuner.save()
    super_search.reset_mock()

    tuner = greedy.Greedy(
        hypermodel=test_utils.build_graph(), directory=tmp_path
    )
    tuner.search(x=None, epochs=10, validation_data=None)

    super_search.assert_not_called()


def test_tuner_does_not_crash_with_distribution_strategy(tmp_path):
    tuner = greedy.Greedy(
        hypermodel=test_utils.build_graph(),
        directory=tmp_path,
    )
    tuner.hypermodel.build(tuner.oracle.hyperparameters)


def test_adapt_with_model_with_preprocessing_layer_only():
    input_node = keras.Input(shape=(10,))
    output_node = keras.layers.Normalization()(input_node)
    model = keras.Model(input_node, output_node)
    greedy.Greedy.adapt(
        model,
        (np.random.rand(100, 10), np.random.rand(100, 10)),
    )


def test_build_block_in_blocks_with_same_name(tmp_path):
    class Block1(ak.Block):
        def build(self, hp, inputs):
            hp.Boolean("a")
            return keras.layers.Dense(3)(tree.flatten(inputs)[0])

    class Block2(ak.Block):
        def build(self, hp, inputs):
            hp.Boolean("b")
            return Block1().build(hp, inputs)

    inputs = ak.Input()
    outputs = Block2()(inputs)
    outputs = ak.RegressionHead()(outputs)
    auto_model = ak.AutoModel(inputs, outputs, max_trials=5, directory=tmp_path)
    auto_model.fit(np.random.rand(100, 5), np.random.rand(100, 1), epochs=1)

    trials = [
        trial for trial_id, trial in auto_model.tuner.oracle.trials.items()
    ]
    for trial in trials:
        assert len(trial.hyperparameters.values) == len(
            trials[0].hyperparameters.values
        )


================================================
FILE: autokeras/graph.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import keras
import keras_tuner
import tree

from autokeras import blocks as blocks_module
from autokeras import nodes as nodes_module
from autokeras.engine import head as head_module
from autokeras.engine import serializable
from autokeras.utils import io_utils


def feature_encoding_input(block):
    """Fetch the column_types and column_names.

    The values are fetched for FeatureEncoding from StructuredDataInput.
    """
    block.column_types = block.inputs[0].column_types
    block.column_names = block.inputs[0].column_names


# Compile the graph.
COMPILE_FUNCTIONS = {
    blocks_module.StructuredDataBlock: [feature_encoding_input],
}


def load_graph(filepath, custom_objects=None):
    if custom_objects is None:
        custom_objects = {}
    with keras.utils.custom_object_scope(custom_objects):
        return Graph.from_config(io_utils.load_json(filepath))


class Graph(keras_tuner.HyperModel, serializable.Serializable):
    """A graph consists of connected Blocks, or Heads.

    # Arguments
        inputs: A list of input node(s) for the Graph.
        outputs: A list of output node(s) for the Graph.
    """

    def __init__(self, inputs=None, outputs=None, **kwargs):
        super().__init__(**kwargs)
        self.inputs = tree.flatten(inputs)
        self.outputs = tree.flatten(outputs)
        self._node_to_id = {}
        self._nodes = []
        self.blocks = []
        self._block_to_id = {}
        if inputs and outputs:
            self._build_network()

        # Temporary attributes
        self.epochs = None
        self.num_samples = None

    def _build_network(self):
        self._node_to_id = {}

        # Recursively find all the interested nodes.
        for input_node in self.inputs:
            self._search_network(input_node, self.outputs, set(), set())
        self._nodes = sorted(
            list(self._node_to_id.keys()), key=lambda x: self._node_to_id[x]
        )

        for node in self.inputs + self.outputs:
            if node not in self._node_to_id:
                raise ValueError("Inputs and outputs not connected.")

        # Find the blocks.
        blocks = []
        for input_node in self._nodes:
            for block in input_node.out_blocks:
                if (
                    any(
                        [
                            output_node in self._node_to_id
                            for output_node in block.outputs
                        ]
                    )
                    and block not in blocks
                ):
                    blocks.append(block)

        # Check if all the inputs of the blocks are set as inputs.
        for block in blocks:
            for input_node in block.inputs:
                if input_node not in self._node_to_id:
                    raise ValueError(
                        "A required input is missing for HyperModel "
                        "{name}.".format(name=block.name)
                    )

        # Calculate the in degree of all the nodes
        in_degree = [0] * len(self._nodes)
        for node_id, node in enumerate(self._nodes):
            in_degree[node_id] = len(
                [block for block in node.in_blocks if block in blocks]
            )

        # Add the blocks in topological order.
        self.blocks = []
        self._block_to_id = {}
        while len(blocks) != 0:
            new_added = []

            # Collect blocks with in degree 0.
            for block in blocks:
                if any(
                    [in_degree[self._node_to_id[node]] for node in block.inputs]
                ):
                    continue
                new_added.append(block)

            # Remove the collected blocks from blocks.
            for block in new_added:
                blocks.remove(block)

            for block in new_added:
                # Add the collected blocks to the Graph.
                self._add_block(block)

                # Decrease the in degree of the output nodes.
                for output_node in block.outputs:
                    output_node_id = self._node_to_id[output_node]
                    in_degree[output_node_id] -= 1

    def _search_network(
        self, input_node, outputs, in_stack_nodes, visited_nodes
    ):
        visited_nodes.add(input_node)
        in_stack_nodes.add(input_node)

        outputs_reached = False
        if input_node in outputs:
            outputs_reached = True

        for block in input_node.out_blocks:
            for output_node in block.outputs:
                if output_node in in_stack_nodes:
                    raise ValueError("The network has a cycle.")
                if output_node not in visited_nodes:
                    self._search_network(
                        output_node, outputs, in_stack_nodes, visited_nodes
                    )
                if output_node in self._node_to_id.keys():
                    outputs_reached = True

        if outputs_reached:
            self._add_node(input_node)

        in_stack_nodes.remove(input_node)

    def _add_block(self, block):
        if block not in self.blocks:
            block_id = len(self.blocks)
            self._block_to_id[block] = block_id
            self.blocks.append(block)

    def _add_node(self, input_node):
        if input_node not in self._node_to_id:
            self._node_to_id[input_node] = len(self._node_to_id)

    def get_config(self):
        blocks = [blocks_module.serialize(block) for block in self.blocks]
        nodes = {
            str(self._node_to_id[node]): nodes_module.serialize(node)
            for node in self.inputs
        }
        block_inputs = {
            str(block_id): [self._node_to_id[node] for node in block.inputs]
            for block_id, block in enumerate(self.blocks)
        }
        block_outputs = {
            str(block_id): [self._node_to_id[node] for node in block.outputs]
            for block_id, block in enumerate(self.blocks)
        }

        outputs = [self._node_to_id[node] for node in self.outputs]

        return {
            "blocks": blocks,  # Dict {id: serialized}.
            "nodes": nodes,  # Dict {id: serialized}.
            "outputs": outputs,  # List of node_ids.
            "block_inputs": block_inputs,  # Dict {id: List of node_ids}.
            "block_outputs": block_outputs,  # Dict {id: List of node_ids}.
        }

    @classmethod
    def from_config(cls, config):
        blocks = [
            blocks_module.deserialize(block) for block in config["blocks"]
        ]
        nodes = {
            int(node_id): nodes_module.deserialize(node)
            for node_id, node in config["nodes"].items()
        }

        inputs = [nodes[node_id] for node_id in nodes]
        for block_id, block in enumerate(blocks):
            input_nodes = [
                nodes[node_id]
                for node_id in config["block_inputs"][str(block_id)]
            ]
            output_nodes = tree.flatten(block(input_nodes))
            for output_node, node_id in zip(
                output_nodes, config["block_outputs"][str(block_id)]
            ):
                nodes[node_id] = output_node

        outputs = [nodes[node_id] for node_id in config["outputs"]]
        return cls(inputs=inputs, outputs=outputs)

    def compile(self):
        """Share the information between blocks."""
        for block in self.blocks:
            for func in COMPILE_FUNCTIONS.get(block.__class__, []):
                func(block)

    def build(self, hp):
        """Build the HyperModel into a Keras Model."""
        self.compile()
        keras_nodes = {}
        keras_input_nodes = []
        for node in self.inputs:
            node_id = self._node_to_id[node]
            input_node = node.build_node(hp)
            output_node = node.build(hp, input_node)
            keras_input_nodes.append(input_node)
            keras_nodes[node_id] = output_node
        for block in self.blocks:
            temp_inputs = [
                keras_nodes[self._node_to_id[input_node]]
                for input_node in block.inputs
            ]
            outputs = block.build(hp, inputs=temp_inputs)
            outputs = tree.flatten(outputs)
            for output_node, real_output_node in zip(block.outputs, outputs):
                keras_nodes[self._node_to_id[output_node]] = real_output_node
        model = keras.Model(
            keras_input_nodes,
            [
                keras_nodes[self._node_to_id[output_node]]
                for output_node in self.outputs
            ],
        )

        return self._compile_keras_model(hp, model)

    def _get_metrics(self):
        metrics = {}
        for output_node in self.outputs:
            block = output_node.in_blocks[0]
            if isinstance(block, head_module.Head):
                metrics[block.name] = block.metrics
        return metrics

    def _get_loss(self):
        loss = {}
        for output_node in self.outputs:
            block = output_node.in_blocks[0]
            if isinstance(block, head_module.Head):
                loss[block.name] = block.loss
        return loss

    def _compile_keras_model(self, hp, model):
        # Specify hyperparameters from compile(...)
        optimizer_name = hp.Choice(
            "optimizer",
            ["adam", "sgd", "adam_weight_decay"],
            default="adam",
        )
        # TODO: add adadelta optimizer when it can optimize embedding layer on
        # GPU.
        learning_rate = hp.Choice(
            "learning_rate", [1e-1, 1e-2, 1e-3, 1e-4, 2e-5, 1e-5], default=1e-3
        )

        if optimizer_name == "adam":
            optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
        elif optimizer_name == "sgd":
            optimizer = keras.optimizers.SGD(learning_rate=learning_rate)
        elif optimizer_name == "adam_weight_decay":
            steps_per_epoch = max(
                1, int(self.num_samples / (self.batch_size or 32))
            )
            num_train_steps = steps_per_epoch * self.epochs

            lr_schedule = keras.optimizers.schedules.PolynomialDecay(
                initial_learning_rate=learning_rate,
                decay_steps=num_train_steps,
                end_learning_rate=0.0,
            )

            optimizer = keras.optimizers.AdamW(
                learning_rate=lr_schedule,
                weight_decay=0.01,
                beta_1=0.9,
                beta_2=0.999,
                epsilon=1e-6,
            )

        model.compile(
            optimizer=optimizer,
            metrics=self._get_metrics(),
            loss=self._get_loss(),
        )

        return model

    def save(self, filepath):
        io_utils.save_json(filepath, self.get_config())

    def set_io_shapes(self, shapes):
        for node, shape in zip(self.inputs, tree.flatten(shapes[0])):
            node.shape = tuple(shape[1:])
        for node, shape in zip(self.outputs, tree.flatten(shapes[1])):
            node.in_blocks[0].shape = tuple(shape[1:])

    def set_fit_args(self, validation_split, epochs=None):
        self.epochs = epochs
        # Epochs not specified by the user
        if self.epochs is None:
            self.epochs = 1
        validation_split = validation_split or 0
        # num_samples from analysers are before split
        self.num_samples = self.inputs[0].num_samples * (1 - validation_split)

    @property
    def batch_size(self):
        return self.inputs[0].batch_size


================================================
FILE: autokeras/graph_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

import keras_tuner
import pytest

import autokeras as ak
from autokeras import graph as graph_module


def test_input_output_disconnect():
    input_node1 = ak.Input()
    output_node = input_node1
    _ = ak.DenseBlock()(output_node)

    input_node = ak.Input()
    output_node = input_node
    output_node = ak.DenseBlock()(output_node)
    output_node = ak.RegressionHead()(output_node)

    with pytest.raises(ValueError) as info:
        graph_module.Graph(inputs=input_node1, outputs=output_node)
    assert "Inputs and outputs not connected." in str(info.value)


def test_hyper_graph_cycle():
    input_node1 = ak.Input()
    input_node2 = ak.Input()
    output_node1 = ak.DenseBlock()(input_node1)
    output_node2 = ak.DenseBlock()(input_node2)
    output_node = ak.Merge()([output_node1, output_node2])
    head = ak.RegressionHead()
    output_node = head(output_node)
    head.outputs = output_node1

    with pytest.raises(ValueError) as info:
        graph_module.Graph(
            inputs=[input_node1, input_node2], outputs=output_node
        )
    assert "The network has a cycle." in str(info.value)


def test_input_missing():
    input_node1 = ak.Input()
    input_node2 = ak.Input()
    output_node1 = ak.DenseBlock()(input_node1)
    output_node2 = ak.DenseBlock()(input_node2)
    output_node = ak.Merge()([output_node1, output_node2])
    output_node = ak.RegressionHead()(output_node)

    with pytest.raises(ValueError) as info:
        graph_module.Graph(inputs=input_node1, outputs=output_node)
    assert "A required input is missing for HyperModel" in str(info.value)


def test_graph_basics():
    input_node = ak.Input(shape=(30,))
    output_node = input_node
    output_node = ak.DenseBlock()(output_node)
    output_node = ak.RegressionHead(shape=(1,))(output_node)

    model = graph_module.Graph(inputs=input_node, outputs=output_node).build(
        keras_tuner.HyperParameters()
    )
    assert model.input_shape == (None, 30)
    assert model.output_shape == (None, 1)


def test_adamw_optimizer():
    input_node = ak.Input(shape=(30,))
    output_node = input_node
    output_node = ak.DenseBlock()(output_node)
    output_node = ak.RegressionHead(shape=(1,))(output_node)

    hp = keras_tuner.HyperParameters()
    hp.Choice("optimizer", ["adam", "sgd", "adam_weight_decay"], default="adam")
    hp.values["optimizer"] = "adam_weight_decay"
    graph = graph_module.Graph(inputs=input_node, outputs=output_node)
    graph.inputs[0].num_samples = 100
    graph.inputs[0].batch_size = 32
    graph.epochs = 10
    graph.set_fit_args(0, epochs=10)
    model = graph.build(hp)
    assert model.input_shape == (None, 30)
    assert model.output_shape == (None, 1)


def test_graph_save_load(tmp_path):
    input1 = ak.Input()
    input2 = ak.Input()
    output1 = ak.DenseBlock()(input1)
    output2 = ak.ConvBlock()(input2)
    output = ak.Merge()([output1, output2])
    output1 = ak.RegressionHead()(output)
    output2 = ak.ClassificationHead()(output)

    graph = graph_module.Graph(
        inputs=[input1, input2],
        outputs=[output1, output2],
    )
    path = os.path.join(tmp_path, "graph")
    graph.save(path)
    graph = graph_module.load_graph(path)

    assert len(graph.inputs) == 2
    assert len(graph.outputs) == 2
    assert isinstance(graph.inputs[0].out_blocks[0], ak.DenseBlock)
    assert isinstance(graph.inputs[1].out_blocks[0], ak.ConvBlock)


def test_merge():
    input_node1 = ak.Input(shape=(30,))
    input_node2 = ak.Input(shape=(40,))
    output_node1 = ak.DenseBlock()(input_node1)
    output_node2 = ak.DenseBlock()(input_node2)
    output_node = ak.Merge()([output_node1, output_node2])
    output_node = ak.RegressionHead(shape=(1,))(output_node)

    model = graph_module.Graph(
        inputs=[input_node1, input_node2], outputs=output_node
    ).build(keras_tuner.HyperParameters())
    assert model.input_shape == [(None, 30), (None, 40)]
    assert model.output_shape == (None, 1)


def test_save_custom_metrics_loss(tmp_path):
    def custom_metric(y_pred, y_true):
        return 1

    def custom_loss(y_pred, y_true):
        return y_pred - y_true

    head = ak.ClassificationHead(
        loss=custom_loss, metrics=["accuracy", custom_metric]
    )
    input_node = ak.Input()
    output_node = head(input_node)
    graph = graph_module.Graph(input_node, output_node)
    path = os.path.join(tmp_path, "graph")
    graph.save(path)
    new_graph = graph_module.load_graph(
        path,
        custom_objects={
            "custom_metric": custom_metric,
            "custom_loss": custom_loss,
        },
    )
    assert new_graph.blocks[0].metrics[1](0, 0) == 1
    assert new_graph.blocks[0].loss(3, 2) == 1


def test_graph_can_init_with_one_missing_output():
    input_node = ak.ImageInput()
    output_node = ak.ConvBlock()(input_node)
    output_node = ak.RegressionHead()(output_node)
    ak.ClassificationHead()(output_node)

    graph_module.Graph(input_node, output_node)


def test_set_fit_args_with_none_validation_split():
    input_node = ak.Input(shape=(30,))
    output_node = input_node
    output_node = ak.DenseBlock()(output_node)
    output_node = ak.RegressionHead(shape=(1,))(output_node)

    graph = graph_module.Graph(inputs=input_node, outputs=output_node)
    graph.inputs[0].num_samples = 100
    graph.inputs[0].batch_size = 32
    graph.set_fit_args(None, epochs=1)
    assert graph.num_samples == 100  # Should handle None as 0


================================================
FILE: autokeras/hyper_preprocessors.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import keras

from autokeras import preprocessors
from autokeras.engine import hyper_preprocessor
from autokeras.utils import utils


def serialize(encoder):
    return utils.serialize_keras_object(encoder)


def deserialize(config, custom_objects=None):
    return utils.deserialize_keras_object(
        config,
        module_objects=globals(),
        custom_objects=custom_objects,
    )


@keras.utils.register_keras_serializable(package="autokeras")
class DefaultHyperPreprocessor(hyper_preprocessor.HyperPreprocessor):
    """HyperPreprocessor without Hyperparameters to tune.

    It would always return the same preprocessor. No hyperparameters to be
    tuned.

    # Arguments
        preprocessor: The Preprocessor to return when calling build.
    """

    def __init__(self, preprocessor, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.preprocessor = preprocessor

    def build(self, hp, dataset):
        return self.preprocessor

    def get_config(self):
        config = super().get_config()
        config.update(
            {"preprocessor": preprocessors.serialize(self.preprocessor)}
        )
        return config

    @classmethod
    def from_config(cls, config):
        config["preprocessor"] = preprocessors.deserialize(
            config["preprocessor"]
        )
        return super().from_config(config)


================================================
FILE: autokeras/hyper_preprocessors_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np

from autokeras import hyper_preprocessors
from autokeras import preprocessors


def test_serialize_and_deserialize_default_hpps():
    preprocessor = preprocessors.AddOneDimension()
    hyper_preprocessor = hyper_preprocessors.DefaultHyperPreprocessor(
        preprocessor
    )
    hyper_preprocessor = hyper_preprocessors.deserialize(
        hyper_preprocessors.serialize(hyper_preprocessor)
    )
    assert isinstance(
        hyper_preprocessor.preprocessor, preprocessors.AddOneDimension
    )


def test_serialize_and_deserialize_default_hpps_categorical():
    x_train = np.array([["a", "ab", 2.1], ["b", "bc", 1.0], ["a", "bc", "nan"]])
    preprocessor = preprocessors.CategoricalToNumerical(
        column_names=["column_a", "column_b", "column_c"],
        column_types={
            "column_a": "categorical",
            "column_b": "categorical",
            "column_c": "numerical",
        },
    )

    hyper_preprocessor = hyper_preprocessors.DefaultHyperPreprocessor(
        preprocessor
    )
    hyper_preprocessor.preprocessor.fit(x_train)
    hyper_preprocessor = hyper_preprocessors.deserialize(
        hyper_preprocessors.serialize(hyper_preprocessor)
    )
    assert isinstance(
        hyper_preprocessor.preprocessor,
        preprocessors.CategoricalToNumerical,
    )

    results = hyper_preprocessor.preprocessor.transform(x_train)

    assert results[0][0] == results[2][0]
    assert results[0][0] != results[1][0]
    assert results[0][1] != results[1][1]
    assert results[0][1] != results[2][1]
    assert results[2][2] == 0
    assert results.dtype == "float32"


================================================
FILE: autokeras/integration_tests/__init__.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: autokeras/integration_tests/functional_api_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import pandas as pd

import autokeras as ak
from autokeras import test_utils


def test_text_and_structured_data(tmp_path):
    # Prepare the data.
    num_instances = 3
    x_text = test_utils.generate_text_data(num_instances)

    x_structured_data = pd.read_csv(test_utils.TRAIN_CSV_PATH)

    x_structured_data = x_structured_data.values

    x_structured_data = x_structured_data[:num_instances]
    y_classification = test_utils.generate_one_hot_labels(
        num_instances=num_instances, num_classes=3
    )
    y_regression = test_utils.generate_data(
        num_instances=num_instances, shape=(1,)
    )

    # Build model and train.
    structured_data_input = ak.StructuredDataInput()
    structured_data_output = ak.DenseBlock()(structured_data_input)

    text_input = ak.TextInput()
    text_output = ak.TextBlock()(text_input)
    merged_outputs = ak.Merge()((structured_data_output, text_output))

    regression_outputs = ak.RegressionHead()(merged_outputs)
    classification_outputs = ak.ClassificationHead()(merged_outputs)
    automodel = ak.AutoModel(
        inputs=[text_input, structured_data_input],
        directory=tmp_path,
        outputs=[regression_outputs, classification_outputs],
        max_trials=2,
        tuner=ak.Hyperband,
        seed=test_utils.SEED,
    )

    automodel.fit(
        (x_text, x_structured_data),
        (y_regression, y_classification),
        validation_split=0.2,
        epochs=1,
        batch_size=2,
    )


def test_image_blocks(tmp_path):
    num_instances = 3
    x_train = test_utils.generate_data(
        num_instances=num_instances, shape=(28, 28)
    )
    y_train = np.random.randint(0, 10, num_instances)

    input_node = ak.ImageInput()
    output = ak.Normalization()(input_node)
    output = ak.ImageAugmentation()(output)
    outputs1 = ak.ResNetBlock(version="v2")(output)
    outputs2 = ak.XceptionBlock()(output)
    output_node = ak.Merge()((outputs1, outputs2))
    output_node = ak.ClassificationHead()(output_node)

    automodel = ak.AutoModel(
        inputs=input_node,
        outputs=output_node,
        directory=tmp_path,
        max_trials=1,
        seed=test_utils.SEED,
    )

    automodel.fit(
        x_train,
        y_train,
        validation_data=(x_train, y_train),
        epochs=1,
        batch_size=2,
    )


================================================
FILE: autokeras/integration_tests/io_api_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pandas as pd

import autokeras as ak
from autokeras import test_utils


def test_io_api(tmp_path):
    num_instances = 3
    image_x = test_utils.generate_data(
        num_instances=num_instances, shape=(28, 28)
    )
    text_x = test_utils.generate_text_data(num_instances=num_instances)

    image_x = image_x[:num_instances]
    structured_data_x = (
        pd.read_csv(test_utils.TRAIN_CSV_PATH)
        .to_numpy()
        .astype(str)[:num_instances]
    )
    classification_y = test_utils.generate_one_hot_labels(
        num_instances=num_instances, num_classes=3
    )
    regression_y = test_utils.generate_data(
        num_instances=num_instances, shape=(1,)
    )

    # Build model and train.
    automodel = ak.AutoModel(
        inputs=[ak.ImageInput(), ak.TextInput(), ak.StructuredDataInput()],
        outputs=[
            ak.RegressionHead(metrics=["mae"]),
            ak.ClassificationHead(
                loss="categorical_crossentropy", metrics=["accuracy"]
            ),
        ],
        directory=tmp_path,
        max_trials=2,
        tuner=ak.RandomSearch,
        seed=test_utils.SEED,
    )
    automodel.fit(
        [image_x, text_x, structured_data_x],
        [regression_y, classification_y],
        epochs=1,
        validation_split=0.2,
        batch_size=2,
    )
    automodel.predict([image_x, text_x, structured_data_x])


================================================
FILE: autokeras/integration_tests/task_api_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import keras
import numpy as np
import pandas as pd

import autokeras as ak
from autokeras import test_utils

NUM_INSTANCES = 3
BATCH_SIZE = 2


def test_image_classifier(tmp_path):
    train_x = test_utils.generate_data(
        num_instances=NUM_INSTANCES, shape=(32, 32)
    )
    train_y = test_utils.generate_one_hot_labels(
        num_instances=NUM_INSTANCES, num_classes=10
    )
    clf = ak.ImageClassifier(
        directory=tmp_path,
        max_trials=2,
        seed=test_utils.SEED,
    )
    clf.fit(
        train_x, train_y, epochs=1, validation_split=0.2, batch_size=BATCH_SIZE
    )
    keras_model = clf.export_model()
    clf.evaluate(train_x, train_y)
    assert clf.predict(train_x).shape == (len(train_x), 10)
    assert isinstance(keras_model, keras.Model)


def test_image_regressor(tmp_path):
    train_x = test_utils.generate_data(
        num_instances=NUM_INSTANCES, shape=(32, 32, 3)
    )
    train_y = test_utils.generate_data(num_instances=NUM_INSTANCES, shape=(1,))
    clf = ak.ImageRegressor(
        directory=tmp_path, max_trials=2, seed=test_utils.SEED
    )
    clf.fit(
        train_x, train_y, epochs=1, validation_split=0.2, batch_size=BATCH_SIZE
    )
    clf.export_model()
    assert clf.predict(train_x).shape == (len(train_x), 1)


def test_text_classifier(tmp_path):
    train_x = test_utils.generate_text_data(num_instances=NUM_INSTANCES)
    train_y = np.array([0, 1] * ((NUM_INSTANCES + 1) // 2))[:NUM_INSTANCES]
    test_x = train_x
    test_y = train_y
    clf = ak.TextClassifier(
        directory=tmp_path,
        max_trials=2,
        seed=test_utils.SEED,
        metrics=["accuracy"],
        objective="accuracy",
    )
    clf.fit(
        train_x,
        train_y,
        epochs=1,
        validation_data=(test_x, test_y),
        batch_size=BATCH_SIZE,
    )
    clf.export_model()
    assert clf.predict(test_x).shape == (len(test_x), 1)
    assert clf.tuner._get_best_trial_epochs() <= 2


def test_text_regressor(tmp_path):
    train_x = test_utils.generate_text_data(num_instances=NUM_INSTANCES)
    test_x = train_x
    train_y = test_utils.generate_data(num_instances=NUM_INSTANCES, shape=(1,))
    test_y = train_y
    clf = ak.TextRegressor(
        directory=tmp_path, max_trials=2, seed=test_utils.SEED
    )
    clf.fit(
        train_x,
        train_y,
        epochs=1,
        validation_data=(test_x, test_y),
        batch_size=BATCH_SIZE,
    )
    clf.predict(test_x)
    clf.export_model()
    assert clf.predict(test_x).shape == (len(test_x), 1)


def test_structured_data_regressor(tmp_path):
    num_data = NUM_INSTANCES * 2
    num_train = NUM_INSTANCES
    data = (
        pd.read_csv(test_utils.TRAIN_CSV_PATH).to_numpy().astype(str)[:num_data]
    )
    x_train, x_test = data[:num_train], data[num_train:]
    y = test_utils.generate_data(num_instances=num_data, shape=tuple())
    y_train, y_test = y[:num_train], y[num_train:]
    clf = ak.StructuredDataRegressor(
        directory=tmp_path, max_trials=2, seed=test_utils.SEED
    )
    clf.fit(
        x_train,
        y_train,
        epochs=11,
        validation_data=(x_train, y_train),
        batch_size=BATCH_SIZE,
    )
    clf.export_model()
    assert clf.predict(x_test).shape == (len(y_test), 1)


def test_structured_data_classifier(tmp_path):
    num_data = NUM_INSTANCES * 2
    num_train = NUM_INSTANCES
    data = (
        pd.read_csv(test_utils.TRAIN_CSV_PATH).to_numpy().astype(str)[:num_data]
    )
    x_train, x_test = data[:num_train], data[num_train:]
    y = test_utils.generate_one_hot_labels(
        num_instances=num_data, num_classes=3
    )
    y_train, y_test = y[:num_train], y[num_train:]
    clf = ak.StructuredDataClassifier(
        directory=tmp_path, max_trials=1, seed=test_utils.SEED
    )
    clf.fit(
        x_train,
        y_train,
        epochs=2,
        validation_data=(x_train, y_train),
        batch_size=BATCH_SIZE,
    )
    clf.export_model()
    assert clf.predict(x_test).shape == (len(y_test), 3)


================================================
FILE: autokeras/keras_layers.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import keras
from keras import layers
from keras import ops

from autokeras.utils import data_utils

INT = "int"
NONE = "none"
ONE_HOT = "one-hot"


class PreprocessingLayer(layers.Layer):
    pass


@keras.utils.register_keras_serializable()
class CastToFloat32(PreprocessingLayer):
    def get_config(self):
        return super().get_config()

    def call(self, inputs):
        # Does not and needs not handle strings.
        return data_utils.cast_to_float32(inputs)

    def adapt(self, data):
        return


@keras.utils.register_keras_serializable()
class ExpandLastDim(PreprocessingLayer):
    def get_config(self):
        return super().get_config()

    def call(self, inputs):
        return ops.expand_dims(inputs, axis=-1)

    def adapt(self, data):
        return


================================================
FILE: autokeras/keras_layers_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import numpy as np

from autokeras import keras_layers as layer_module


def get_text_data():
    train = np.array(
        [
            ["This is a test example"],
            ["This is another text example"],
            ["Is this another example?"],
            [""],
            ["Is this a long long long long long long example?"],
        ],
        dtype=str,
    )
    test = np.array(
        [
            ["This is a test example"],
            ["This is another text example"],
            ["Is this another example?"],
        ],
        dtype=str,
    )
    y = np.random.rand(3, 1)
    return train, test, y


def test_cast_to_float32_return_float32_tensor(tmp_path):
    layer = layer_module.CastToFloat32()

    tensor = layer(np.array([3], dtype="uint8"))

    assert "float32" == tensor.numpy().dtype


def test_expand_last_dim_return_tensor_with_more_dims(tmp_path):
    layer = layer_module.ExpandLastDim()

    tensor = layer(np.array([0.1, 0.2], dtype="float32"))

    assert 2 == len(tensor.shape)


================================================
FILE: autokeras/nodes.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Dict
from typing import List
from typing import Optional

import keras
import tree

from autokeras import adapters
from autokeras import analysers
from autokeras import blocks
from autokeras import hyper_preprocessors
from autokeras import keras_layers
from autokeras import preprocessors
from autokeras.engine import io_hypermodel
from autokeras.engine import node as node_module
from autokeras.utils import utils


def serialize(obj):
    return utils.serialize_keras_object(obj)


def deserialize(config, custom_objects=None):
    return utils.deserialize_keras_object(
        config,
        module_objects=globals(),
        custom_objects=custom_objects,
    )


@keras.utils.register_keras_serializable(package="autokeras")
class Input(node_module.Node, io_hypermodel.IOHyperModel):
    """Input node for tensor data.

    The data should be numpy.ndarray.

    # Arguments
        name: String. The name of the input node. If unspecified, it will be set
            automatically with the class name.
    """

    def __init__(self, name: Optional[str] = None, **kwargs):
        super().__init__(name=name, **kwargs)

    def build_node(self, hp):
        return keras.Input(shape=self.shape, dtype=self.dtype)

    def build(self, hp, inputs=None):
        input_node = tree.flatten(inputs)[0]
        return keras_layers.CastToFloat32()(input_node)

    def get_adapter(self):
        return adapters.InputAdapter()

    def get_analyser(self):
        return analysers.InputAnalyser()

    def get_block(self):
        return blocks.GeneralBlock()

    def get_hyper_preprocessors(self):
        return []


class ImageInput(Input):
    """Input node for image data.

    The input data should be numpy.ndarray. The shape of the
    data should be should be (samples, width, height) or (samples, width,
    height, channels).

    # Arguments
        name: String. The name of the input node. If unspecified, it will be set
            automatically with the class name.
    """

    def __init__(self, name: Optional[str] = None, **kwargs):
        super().__init__(name=name, **kwargs)

    def build(self, hp, inputs=None):
        inputs = super().build(hp, inputs)
        output_node = tree.flatten(inputs)[0]
        if len(output_node.shape) == 3:
            output_node = keras_layers.ExpandLastDim()(output_node)
        return output_node

    def get_adapter(self):
        return adapters.ImageAdapter()

    def get_analyser(self):
        return analysers.ImageAnalyser()

    def get_block(self):
        return blocks.ImageBlock()


class TextInput(Input):
    """Input node for text data.

    The input data should be numpy.ndarray. The data should be
    one-dimensional. Each element in the data should be a string which is a
    full sentence.

    # Arguments
        name: String. The name of the input node. If unspecified, it will be set
            automatically with the class name.
    """

    def __init__(self, name: Optional[str] = None, **kwargs):
        super().__init__(name=name, **kwargs)

    def build_node(self, hp):
        return keras.Input(shape=self.shape, dtype="int32")

    def build(self, hp, inputs=None):
        output_node = tree.flatten(inputs)[0]
        if len(output_node.shape) == 1:
            output_node = keras_layers.ExpandLastDim()(  # pragma: no cover
                output_node
            )
        return output_node

    def get_adapter(self):
        return adapters.TextAdapter()

    def get_analyser(self):
        return analysers.TextAnalyser()

    def get_block(self):
        return blocks.TextBlock()

    def get_hyper_preprocessors(self):
        return [
            hyper_preprocessors.DefaultHyperPreprocessor(
                preprocessors.CastToString()
            ),
            hyper_preprocessors.DefaultHyperPreprocessor(
                preprocessors.TextTokenizer()
            ),
        ]


class StructuredDataInput(Input):
    """Input node for structured data.

    The input data should be numpy.ndarray. The data should be two-dimensional
    with numerical or categorical values.

    # Arguments
        column_names: A list of strings specifying the names of the columns. The
            length of the list should be equal to the number of columns of the
            data. Defaults to None.
        column_types: Dict. The keys are the column names. The values should
            either be 'numerical' or 'categorical', indicating the type of that
            column. Defaults to None. If not None, the column_names need to be
            specified. If None, it will be inferred from the data. A column will
            be judged as categorical if the number of different values is less
            than 5% of the number of instances.
        name: String. The name of the input node. If unspecified, it will be set
            automatically with the class name.
    """

    def __init__(
        self,
        column_names: Optional[List[str]] = None,
        column_types: Optional[Dict[str, str]] = None,
        name: Optional[str] = None,
        **kwargs
    ):
        super().__init__(name=name, **kwargs)
        self.column_names = column_names
        self.column_types = column_types

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "column_names": self.column_names,
                "column_types": self.column_types,
            }
        )
        return config

    def get_adapter(self):
        return adapters.StructuredDataAdapter()

    def get_analyser(self):
        return analysers.StructuredDataAnalyser(
            self.column_names, self.column_types
        )

    def get_block(self):
        return blocks.StructuredDataBlock()

    def config_from_analyser(self, analyser):
        super().config_from_analyser(analyser)
        self.column_names = analyser.column_names
        # Analyser keeps the specified ones and infer the missing ones.
        self.column_types = analyser.column_types

    def build(self, hp, inputs=None):
        return inputs

    def get_hyper_preprocessors(self):
        return [
            hyper_preprocessors.DefaultHyperPreprocessor(
                preprocessors.CategoricalToNumerical(
                    column_names=self.column_names,
                    column_types=self.column_types,
                )
            )
        ]


================================================
FILE: autokeras/nodes_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from autokeras import blocks
from autokeras import nodes


def test_input_get_block_return_general_block():
    input_node = nodes.Input()
    assert isinstance(input_node.get_block(), blocks.GeneralBlock)


================================================
FILE: autokeras/pipeline.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import keras
import tree

from autokeras import preprocessors as preprocessors_module
from autokeras.engine import hyper_preprocessor as hpps_module
from autokeras.engine import preprocessor as pps_module
from autokeras.utils import io_utils


class HyperPipeline(hpps_module.HyperPreprocessor):
    """A search space consists of HyperPreprocessors.

    # Arguments
        inputs: a list of lists of HyperPreprocessors.
        outputs: a list of lists of HyperPreprocessors.
    """

    def __init__(self, inputs, outputs, **kwargs):
        super().__init__(**kwargs)
        self.inputs = inputs
        self.outputs = outputs

    @staticmethod
    def _build_preprocessors(hp, hpps_lists, dataset):
        sources = tree.flatten(dataset)
        preprocessors_list = []
        for source, hpps_list in zip(sources, hpps_lists):
            data = source
            preprocessors = []
            for hyper_preprocessor in hpps_list:
                preprocessor = hyper_preprocessor.build(hp, data)
                preprocessor.fit(data)
                data = preprocessor.transform(data)
                preprocessors.append(preprocessor)
            preprocessors_list.append(preprocessors)
        return preprocessors_list

    def build(self, hp, dataset):
        """Build a Pipeline by Hyperparameters.

        # Arguments
            hp: Hyperparameters.
            dataset: nested numpy arrays. The input dataset for the model.

        # Returns
            An instance of Pipeline.
        """
        x, y = dataset
        return Pipeline(
            inputs=self._build_preprocessors(hp, self.inputs, x),
            outputs=self._build_preprocessors(hp, self.outputs, y),
        )


def load_pipeline(filepath, custom_objects=None):
    """Load a Pipeline instance from disk."""
    if custom_objects is None:
        custom_objects = {}
    with keras.utils.custom_object_scope(custom_objects):
        return Pipeline.from_config(io_utils.load_json(filepath))


class Pipeline(pps_module.Preprocessor):
    """A data pipeline for transform the entire dataset.

    # Arguments
        inputs: A list of lists of Preprocessors. For the input datasets for
            the model.
        outputs: A list of lists of Preprocessors. For the target datasets for
            the model.
    """

    def __init__(self, inputs, outputs, **kwargs):
        super().__init__(**kwargs)
        self.inputs = inputs
        self.outputs = outputs

    def fit(self, dataset):
        """Fit the Preprocessors."""
        x, y = dataset
        sources_x = tree.flatten(x)
        for pps_list, source in zip(self.inputs, sources_x):
            for preprocessor in pps_list:
                preprocessor.fit(source)  # pragma: no cover
                source = preprocessor.transform(source)  # pragma: no cover
        sources_y = tree.flatten(y)
        for pps_list, source in zip(self.outputs, sources_y):
            for preprocessor in pps_list:
                preprocessor.fit(source)
                source = preprocessor.transform(source)
        return

    def transform(self, dataset):
        """Transform the dataset to be ready for the model.

        # Arguments
            dataset: nested numpy arrays. The input dataset for the model.

        # Returns
            dataset: nested numpy arrays. The input dataset for the model.
        """
        x, y = dataset
        x = self.transform_x(x)
        y = self.transform_y(y)
        return (x, y)

    def transform_x(self, dataset):
        """Transform the input dataset for the model.

        # Arguments
            dataset: nested numpy arrays. The input dataset for the model.

        # Returns
            dataset: nested numpy arrays. The input dataset for the model.
        """
        return self._transform_data(dataset, self.inputs)

    def transform_y(self, dataset):
        """Transform the target dataset for the model.

        # Arguments
            dataset: nested numpy arrays. The target dataset for the model.

        # Returns
            dataset: nested numpy arrays. The input dataset for the model.
        """
        return self._transform_data(dataset, self.outputs)

    def _transform_data(self, dataset, pps_lists):
        sources = tree.flatten(dataset)
        transformed = []
        for pps_list, y in zip(pps_lists, sources):
            for preprocessor in pps_list:
                y = preprocessor.transform(y)
            transformed.append(y)
        if len(transformed) == 1:
            return transformed[0]
        return tuple(transformed)

    def save(self, filepath):
        io_utils.save_json(filepath, self.get_config())

    def get_config(self):
        return {
            "inputs": [
                [
                    preprocessors_module.serialize(preprocessor)
                    for preprocessor in preprocessors
                ]
                for preprocessors in self.inputs
            ],
            "outputs": [
                [
                    preprocessors_module.serialize(preprocessor)
                    for preprocessor in preprocessors
                ]
                for preprocessors in self.outputs
            ],
        }

    @classmethod
    def from_config(cls, config):
        return cls(
            inputs=[
                [
                    preprocessors_module.deserialize(preprocessor)
                    for preprocessor in preprocessors
                ]
                for preprocessors in config["inputs"]
            ],
            outputs=[
                [
                    preprocessors_module.deserialize(preprocessor)
                    for preprocessor in preprocessors
                ]
                for preprocessors in config["outputs"]
            ],
        )

    def postprocess(self, y):
        """Postprocess the outputs of the model.

        # Arguments
            y: numpy.ndarray or a list of numpy.ndarrays. The output of the
                Keras model.

        # Returns
            A list or an instance of numpy.ndarray. The postprocessed data for
            the heads.
        """
        outputs = []
        for source, preprocessors in zip(tree.flatten(y), self.outputs):
            for preprocessor in preprocessors[::-1]:
                if isinstance(preprocessor, pps_module.TargetPreprocessor):
                    source = preprocessor.postprocess(source)
            outputs.append(source)
        if len(outputs) == 1:
            return outputs[0]
        return outputs


================================================
FILE: autokeras/pipeline_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np

from autokeras import pipeline as pipeline_module
from autokeras import preprocessors


def test_pipeline_postprocess_one_hot_to_labels():
    pipeline = pipeline_module.Pipeline(
        inputs=[[]], outputs=[[preprocessors.OneHotEncoder(["a", "b", "c"])]]
    )
    assert np.array_equal(
        pipeline.postprocess(np.eye(3)), [["a"], ["b"], ["c"]]
    )


def test_pipeline_postprocess_multiple_one_hot_to_labels():
    pipeline = pipeline_module.Pipeline(
        inputs=[[]],
        outputs=[
            [preprocessors.OneHotEncoder(["a", "b", "c"])],
            [preprocessors.OneHotEncoder(["a", "b", "c"])],
        ],
    )
    result = pipeline.postprocess([np.eye(3), np.eye(3)])
    assert np.array_equal(result[0], [["a"], ["b"], ["c"]])
    assert np.array_equal(result[1], [["a"], ["b"], ["c"]])


================================================
FILE: autokeras/preprocessors/__init__.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import keras

from autokeras.preprocessors.common import AddOneDimension
from autokeras.preprocessors.common import CastToInt32
from autokeras.preprocessors.common import CastToString
from autokeras.preprocessors.common import TextTokenizer
from autokeras.preprocessors.encoders import CategoricalToNumerical
from autokeras.preprocessors.encoders import LabelEncoder
from autokeras.preprocessors.encoders import OneHotEncoder
from autokeras.preprocessors.postprocessors import SigmoidPostprocessor
from autokeras.preprocessors.postprocessors import SoftmaxPostprocessor
from autokeras.utils import utils


def serialize(preprocessor):
    return utils.serialize_keras_object(preprocessor)


def deserialize(config, custom_objects=None):
    return utils.deserialize_keras_object(
        config,
        module_objects=globals(),
        custom_objects=custom_objects,
    )


================================================
FILE: autokeras/preprocessors/common.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from collections import Counter

import keras
import numpy as np

from autokeras.engine import preprocessor


@keras.utils.register_keras_serializable(package="autokeras")
class AddOneDimension(preprocessor.Preprocessor):
    """Append one dimension of size one to the dataset shape."""

    def transform(self, dataset):
        return np.expand_dims(dataset, axis=-1)


@keras.utils.register_keras_serializable(package="autokeras")
class CastToInt32(preprocessor.Preprocessor):
    """Cast the dataset shape to int32."""

    def transform(self, dataset):
        return dataset.astype("int32")


@keras.utils.register_keras_serializable(package="autokeras")
class CastToString(preprocessor.Preprocessor):
    """Cast the dataset shape to string."""

    def transform(self, dataset):
        if np.issubdtype(dataset.dtype, np.bytes_):
            return np.array(
                [x.decode("utf-8", errors="ignore") for x in dataset]
            )
        else:
            return dataset.astype("str")


@keras.utils.register_keras_serializable(package="autokeras")
class TextTokenizer(preprocessor.Preprocessor):
    """Simple text tokenizer that converts strings to integer sequences."""

    def __init__(self, max_len=100, vocab=None, max_vocab=500, **kwargs):
        super().__init__(**kwargs)
        self.max_len = max_len
        self.vocab = vocab
        self.max_vocab = max_vocab

    def fit(self, dataset):
        # Build vocab from unique words in the dataset
        unique_words = []
        for text in dataset:
            words = text.split()
            unique_words.extend(words)
        word_counts = Counter(unique_words)
        sorted_words = sorted(word_counts, key=word_counts.get, reverse=True)
        self.vocab = {
            word: idx + 1
            for idx, word in enumerate(sorted_words[: self.max_vocab])
        }  # Start from 1, 0 for padding

    def transform(self, dataset):
        # dataset is np.array of strings
        sequences = []
        for text in dataset:
            words = text.split()[: self.max_len]
            seq = [self.vocab.get(word, 0) for word in words]  # 0 for unknown
            # Pad with 0s
            seq += [0] * (self.max_len - len(seq))
            sequences.append(seq)
        return np.array(sequences, dtype=np.int32)

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "max_len": self.max_len,
                "vocab": self.vocab,
                "max_vocab": self.max_vocab,
            }
        )
        return config


================================================
FILE: autokeras/preprocessors/common_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np

from autokeras import test_utils
from autokeras.preprocessors import common


def test_cast_to_int32_return_int32():
    x = test_utils.generate_one_hot_labels(100, 10)
    x = x.astype("uint8")
    x = common.CastToInt32().transform(x)
    assert x.dtype == "int32"


def test_cast_to_string_with_bytes():
    x = np.array([b"hello", b"world"])
    result = common.CastToString().transform(x)
    assert result.dtype.kind in ["U", "S"]  # Unicode or byte string
    assert result[0] == "hello"
    assert result[1] == "world"


def test_cast_to_string_with_strings():
    x = np.array(["hello", "world"])
    result = common.CastToString().transform(x)
    assert result.dtype.kind in ["U", "S"]
    assert result[0] == "hello"
    assert result[1] == "world"


def test_text_tokenizer_vocab_limit():
    x = np.array(["word1 word2 word3", "word1 word4 word5"])
    tokenizer = common.TextTokenizer(max_vocab=2)
    tokenizer.fit(x)
    assert len(tokenizer.vocab) <= 3  # 2 words + 1 for unknown (0 is padding)
    # word1 should be most frequent
    assert "word1" in tokenizer.vocab
    assert tokenizer.vocab["word1"] == 1


def test_text_tokenizer_transform():
    x = np.array(["hello world", "hello"])
    tokenizer = common.TextTokenizer(max_vocab=10)
    tokenizer.fit(x)
    result = tokenizer.transform(x)
    assert result.shape == (2, 100)  # max_len=100
    assert result.dtype == np.int32
    assert result[0][0] == tokenizer.vocab.get("hello", 0)


================================================
FILE: autokeras/preprocessors/encoders.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import keras
import numpy as np

from autokeras import analysers
from autokeras.engine import preprocessor


@keras.utils.register_keras_serializable(package="autokeras")
class Encoder(preprocessor.TargetPreprocessor):
    """Transform labels to encodings.

    # Arguments
        labels: A list of labels of any type. The labels to be encoded.
    """

    def __init__(self, labels, **kwargs):
        super().__init__(**kwargs)
        self.labels = [
            label.decode("utf-8") if isinstance(label, bytes) else str(label)
            for label in labels
        ]

    def get_config(self):
        return {"labels": self.labels}

    def fit(self, dataset):
        return

    def transform(self, dataset):
        """Transform labels to integer encodings.

        # Arguments
            dataset: numpy.ndarray. The dataset to be transformed.

        # Returns
            numpy.ndarray. The transformed dataset.
        """
        label_to_idx = {label: idx for idx, label in enumerate(self.labels)}
        return np.array(
            [
                label_to_idx.get(str(label), len(self.labels))
                for label in dataset.flatten()
            ]
        ).reshape(dataset.shape)


@keras.utils.register_keras_serializable(package="autokeras")
class OneHotEncoder(Encoder):
    def transform(self, dataset):
        """Transform labels to one-hot encodings.

        # Arguments
            dataset: numpy.ndarray. The dataset to be transformed.

        # Returns
            numpy.ndarray. The transformed dataset.
        """
        dataset = super().transform(dataset)
        eye = np.eye(len(self.labels))
        encoded_int = dataset.squeeze(axis=-1)
        result = np.zeros((len(encoded_int), len(self.labels)))
        for i, idx in enumerate(encoded_int):
            if idx < len(self.labels):
                result[i] = eye[idx]
        return result

    def postprocess(self, data):
        """Transform probabilities back to labels.

        # Arguments
            data: numpy.ndarray. The output probabilities of the classification
                head.

        # Returns
            numpy.ndarray. The original labels.
        """
        return np.array(
            list(
                map(
                    lambda x: (
                        self.labels[x] if x < len(self.labels) else "unknown"
                    ),
                    np.argmax(np.array(data), axis=1),
                )
            )
        ).reshape(-1, 1)


@keras.utils.register_keras_serializable(package="autokeras")
class LabelEncoder(Encoder):
    """Transform the labels to integer encodings."""

    def transform(self, dataset):
        """Transform labels to integer encodings.

        # Arguments
            dataset: numpy.ndarray. The dataset to be transformed.

        # Returns
            numpy.ndarray. The transformed dataset.
        """
        dataset = super().transform(dataset)
        return dataset

    def postprocess(self, data):
        """Transform probabilities back to labels.

        # Arguments
            data: numpy.ndarray. The output probabilities of the classification
                head.

        # Returns
            numpy.ndarray. The original labels.
        """
        return np.array(
            list(
                map(
                    lambda x: (
                        self.labels[int(round(x[0]))]
                        if int(round(x[0])) < len(self.labels)
                        else "unknown"
                    ),
                    np.array(data),
                )
            )
        ).reshape(-1, 1)


@keras.utils.register_keras_serializable()
class CategoricalToNumerical(preprocessor.Preprocessor):
    """Encode the categorical features to numerical features.

    # Arguments
        column_names: A list of strings specifying the names of the columns. The
            length of the list should be equal to the number of columns of the
            data.
        column_types: Dict. The keys are the column names. The values should
            either be 'numerical' or 'categorical', indicating the type of that
            column. Defaults to None. If not None, the column_names need to be
            specified.  If None, it will be inferred from the data.
    """

    # TODO: Support one-hot encoding.
    # TODO: Support frequency encoding.

    def __init__(self, column_names, column_types, **kwargs):
        super().__init__(**kwargs)
        self.column_names = column_names
        self.column_types = column_types
        self.encoding = []
        for column_name in self.column_names:
            column_type = self.column_types[column_name]
            if column_type == analysers.CATEGORICAL:
                # TODO: Search to use one-hot or int.
                self.encoding.append("int")
            else:
                self.encoding.append("none")
        self.encoders = [None] * len(self.encoding)

    def fit(self, dataset):
        for i, enc_type in enumerate(self.encoding):
            if enc_type == "none":
                continue
            column_data = dataset[:, i]
            unique_labels = np.unique(column_data)
            if enc_type == "int":
                self.encoders[i] = LabelEncoder(unique_labels)
            elif enc_type == "one_hot":  # pragma: no cover
                self.encoders[i] = OneHotEncoder(  # pragma: no cover
                    unique_labels
                )
            else:
                raise ValueError(  # pragma: no cover
                    f"Unsupported encoding: {enc_type}"
                )

    def transform(self, dataset):
        outputs = []
        for i, enc_type in enumerate(self.encoding):
            column_data = dataset[:, i : i + 1]
            if enc_type == "none":
                column_data = column_data.astype("float32")
                # Replace NaN with 0.
                imputed = np.where(np.isnan(column_data), 0, column_data)
                outputs.append(imputed)
            else:
                encoded = self.encoders[i].transform(column_data)
                outputs.append(encoded)
        return np.concatenate(outputs, axis=1).astype("float32")

    def get_config(self):
        encoders_config = []
        for enc in self.encoders:
            if enc is None:
                encoders_config.append(
                    {"encoder_type": None, "encoder_config": None}
                )
            else:
                encoder_type = (
                    "label" if isinstance(enc, LabelEncoder) else "one_hot"
                )
                encoders_config.append(
                    {
                        "encoder_type": encoder_type,
                        "encoder_config": enc.get_config(),
                    }
                )
        return {
            "column_types": self.column_types,
            "column_names": self.column_names,
            "encoding": self.encoding,
            "encoders": encoders_config,
        }

    @classmethod
    def from_config(cls, config):
        obj = cls(config["column_names"], config["column_types"])
        obj.encoding = config["encoding"]
        obj.encoders = []
        for item in config["encoders"]:
            if item["encoder_type"] is None:
                obj.encoders.append(None)
            elif item["encoder_type"] == "label":
                obj.encoders.append(LabelEncoder(**item["encoder_config"]))
            elif item["encoder_type"] == "one_hot":  # pragma: no cover
                obj.encoders.append(  # pragma: no cover
                    OneHotEncoder(**item["encoder_config"])
                )
        return obj


================================================
FILE: autokeras/preprocessors/encoders_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np

from autokeras import preprocessors
from autokeras.preprocessors import encoders
from autokeras.utils import data_utils


def test_one_hot_encoder_deserialize_transforms_to_np():
    encoder = encoders.OneHotEncoder(["a", "b", "c"])
    encoder.fit(np.array(["a", "b", "a"]))

    encoder = preprocessors.deserialize(preprocessors.serialize(encoder))
    one_hot = encoder.transform(np.array([["a"], ["c"], ["b"]]))

    assert one_hot.shape[1:] == (3,)


def test_one_hot_encoder_decode_to_same_string():
    encoder = encoders.OneHotEncoder(["a", "b", "c"])

    result = encoder.postprocess(np.eye(3))

    assert np.array_equal(result, np.array([["a"], ["b"], ["c"]]))


def test_label_encoder_decode_to_same_string():
    encoder = encoders.LabelEncoder(["a", "b"])

    result = encoder.postprocess([[0], [1]])

    assert np.array_equal(result, np.array([["a"], ["b"]]))


def test_label_encoder_encode_to_correct_shape():
    encoder = encoders.LabelEncoder(["a", "b"])
    dataset = np.array([["a"], ["b"]])

    result = encoder.transform(dataset)
    print(data_utils.dataset_shape(result))

    assert np.array_equal(data_utils.dataset_shape(result), [2, 1])


================================================
FILE: autokeras/preprocessors/postprocessors.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import keras
import numpy as np

from autokeras.engine import preprocessor


@keras.utils.register_keras_serializable(package="autokeras")
class PostProcessor(preprocessor.TargetPreprocessor):
    def transform(self, dataset):
        return dataset


@keras.utils.register_keras_serializable(package="autokeras")
class SigmoidPostprocessor(PostProcessor):
    """Postprocessor for sigmoid outputs."""

    def postprocess(self, data):
        """Transform probabilities to zeros and ones.

        # Arguments
            data: numpy.ndarray. The output probabilities of the classification
                head.

        # Returns
            numpy.ndarray. The zeros and ones predictions.
        """
        data[data < 0.5] = 0
        data[data > 0.5] = 1
        return data


@keras.utils.register_keras_serializable(package="autokeras")
class SoftmaxPostprocessor(PostProcessor):
    """Postprocessor for softmax outputs."""

    def postprocess(self, data):
        """Transform probabilities to zeros and ones.

        # Arguments
            data: numpy.ndarray. The output probabilities of the classification
                head.

        # Returns
            numpy.ndarray. The zeros and ones predictions.
        """
        idx = np.argmax(data, axis=-1)
        data = np.zeros(data.shape)
        data[np.arange(data.shape[0]), idx] = 1
        return data


================================================
FILE: autokeras/preprocessors/postprocessors_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np

from autokeras import preprocessors
from autokeras.preprocessors import postprocessors


def test_sigmoid_postprocess_to_zero_one():
    postprocessor = postprocessors.SigmoidPostprocessor()

    y = postprocessor.postprocess(np.random.rand(10, 3))

    assert set(y.flatten().tolist()) == set([1, 0])


def test_sigmoid_deserialize_without_error():
    postprocessor = postprocessors.SigmoidPostprocessor()
    dataset = np.array([1, 2])

    postprocessor = preprocessors.deserialize(
        preprocessors.serialize(postprocessor)
    )

    assert isinstance(postprocessor.transform(dataset), np.ndarray)


def test_softmax_postprocess_to_zero_one():
    postprocessor = postprocessors.SoftmaxPostprocessor()

    y = postprocessor.postprocess(np.random.rand(10, 3))

    assert set(y.flatten().tolist()) == set([1, 0])


def test_softmax_transform_dataset_doesnt_change():
    postprocessor = postprocessors.SoftmaxPostprocessor()
    dataset = np.array([1, 2])

    assert isinstance(postprocessor.transform(dataset), np.ndarray)


def test_softmax_deserialize_without_error():
    postprocessor = postprocessors.SoftmaxPostprocessor()
    dataset = np.array([1, 2])

    postprocessor = preprocessors.deserialize(
        preprocessors.serialize(postprocessor)
    )

    assert isinstance(postprocessor.transform(dataset), np.ndarray)


================================================
FILE: autokeras/tasks/__init__.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from autokeras.tasks.image import ImageClassifier
from autokeras.tasks.image import ImageRegressor
from autokeras.tasks.structured_data import StructuredDataClassifier
from autokeras.tasks.structured_data import StructuredDataRegressor
from autokeras.tasks.text import TextClassifier
from autokeras.tasks.text import TextRegressor


================================================
FILE: autokeras/tasks/image.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from pathlib import Path
from typing import List
from typing import Optional
from typing import Tuple
from typing import Type
from typing import Union

import keras

from autokeras import auto_model
from autokeras import blocks
from autokeras import nodes as input_module
from autokeras.engine import tuner
from autokeras.tuners import greedy
from autokeras.tuners import task_specific
from autokeras.utils import types


class SupervisedImagePipeline(auto_model.AutoModel):
    def __init__(self, outputs, **kwargs):
        super().__init__(
            inputs=input_module.ImageInput(), outputs=outputs, **kwargs
        )


class ImageClassifier(SupervisedImagePipeline):
    """AutoKeras image classification class.

    # Arguments
        num_classes: Int. Defaults to None. If None, it will be inferred from
            the data.
        multi_label: Boolean. Defaults to False.
        loss: A Keras loss function. Defaults to use 'binary_crossentropy' or
            'categorical_crossentropy' based on the number of classes.
        metrics: A list of Keras metrics. Defaults to use 'accuracy'.
        project_name: String. The name of the AutoModel.
            Defaults to 'image_classifier'.
        max_trials: Int. The maximum number of different Keras Models to try.
            The search may finish before reaching the max_trials. Defaults to
            100.
        directory: String. The path to a directory for storing the search
            outputs.  Defaults to None, which would create a folder with the
            name of the AutoModel in the current directory.
        objective: String. Name of model metric to minimize
            or maximize, e.g. 'val_accuracy'. Defaults to 'val_loss'.
        tuner: String or subclass of AutoTuner. If string, it should be one of
            'greedy', 'bayesian', 'hyperband' or 'random'. It can also be a
            subclass of AutoTuner. If left unspecified, it uses a task specific
            tuner, which first evaluates the most commonly used models for the
            task before exploring other models.
        overwrite: Boolean. Defaults to `False`. If `False`, reloads an existing
            project of the same name if one is found. Otherwise, overwrites the
            project.
        seed: Int. Random seed.
        max_model_size: Int. Maximum number of scalars in the parameters of a
            model. Models larger than this are rejected.
        **kwargs: Any arguments supported by AutoModel.
    """

    def __init__(
        self,
        num_classes: Optional[int] = None,
        multi_label: bool = False,
        loss: types.LossType = None,
        metrics: Optional[types.MetricsType] = None,
        project_name: str = "image_classifier",
        max_trials: int = 100,
        directory: Union[str, Path, None] = None,
        objective: str = "val_loss",
        tuner: Union[str, Type[tuner.AutoTuner]] = None,
        overwrite: bool = False,
        seed: Optional[int] = None,
        max_model_size: Optional[int] = None,
        **kwargs
    ):
        if tuner is None:
            tuner = task_specific.ImageClassifierTuner
        super().__init__(
            outputs=blocks.ClassificationHead(
                num_classes=num_classes,
                multi_label=multi_label,
                loss=loss,
                metrics=metrics,
            ),
            max_trials=max_trials,
            directory=directory,
            project_name=project_name,
            objective=objective,
            tuner=tuner,
            overwrite=overwrite,
            seed=seed,
            max_model_size=max_model_size,
            **kwargs
        )

    def fit(
        self,
        x: Optional[types.DatasetType] = None,
        y: Optional[types.DatasetType] = None,
        epochs: Optional[int] = None,
        callbacks: Optional[List[keras.callbacks.Callback]] = None,
        validation_split: Optional[float] = 0.2,
        validation_data: Union[
            Tuple[types.DatasetType, types.DatasetType], None
        ] = None,
        **kwargs
    ):
        """Search for the best model and hyperparameters for the AutoModel.

        It will search for the best model based on the performances on
        validation data.

        # Arguments
            x: numpy.ndarray. Training data x. The shape
                of the data should be (samples, width, height) or (samples,
                width, height, channels).
            y: numpy.ndarray. Training data y. It can be
                raw labels, one-hot encoded if more than two classes, or binary
                encoded for binary classification.
            epochs: Int. The number of epochs to train each model during the
                search. If unspecified, by default we train for a maximum of
                1000 epochs, but we stop training if the validation loss stops
                improving for 10 epochs (unless you specified an EarlyStopping
                callback as part of the callbacks argument, in which case the
                EarlyStopping callback you specified will determine early
                stopping).
            callbacks: List of Keras callbacks to apply during training and
                validation.
            validation_split: Float between 0 and 1. Defaults to 0.2. Fraction
                of the training data to be used as validation data. The model
                will set apart this fraction of the training data, will not
                train on it, and will evaluate the loss and any model metrics on
                this data at the end of each epoch. The validation data is
                selected from the last samples in the `x` and `y` data provided,
                before shuffling. This argument is not supported when `x` is a
                dataset. The best model found would be fit on the entire dataset
                including the validation data.
            validation_data: Data on which to evaluate the loss and any model
                metrics at the end of each epoch. The model will not be trained
                on this data. `validation_data` will override
                `validation_split`. The type of the validation data should be
                the same as the training data. The best model found would be
                fit on the training dataset without the validation data.
            **kwargs: Any arguments supported by
                [keras.Model.fit](https://keras.io/api/models/model_training_apis/#fit-method).
        # Returns
            history: A Keras History object corresponding to the best model.
                Its History.history attribute is a record of training loss
                values and metrics values at successive epochs, as well as
                validation loss values and validation metrics values (if
                applicable).
        """
        history = super().fit(
            x=x,
            y=y,
            epochs=epochs,
            callbacks=callbacks,
            validation_split=validation_split,
            validation_data=validation_data,
            **kwargs
        )
        return history


class ImageRegressor(SupervisedImagePipeline):
    """AutoKeras image regression class.

    # Arguments
        output_dim: Int. The number of output dimensions. Defaults to None.
            If None, it will be inferred from the data.
        loss: A Keras loss function. Defaults to use 'mean_squared_error'.
        metrics: A list of Keras metrics. Defaults to use 'mean_squared_error'.
        project_name: String. The name of the AutoModel.
            Defaults to 'image_regressor'.
        max_trials: Int. The maximum number of different Keras Models to try.
            The search may finish before reaching the max_trials. Defaults to
            100.
        directory: String. The path to a directory for storing the search
            outputs. Defaults to None, which would create a folder with the
            name of the AutoModel in the current directory.
        objective: String. Name of model metric to minimize
            or maximize, e.g. 'val_accuracy'. Defaults to 'val_loss'.
        tuner: String or subclass of AutoTuner. If string, it should be one of
            'greedy', 'bayesian', 'hyperband' or 'random'. It can also be a
            subclass of AutoTuner. If left unspecified, it uses a task specific
            tuner, which first evaluates the most commonly used models for the
            task before exploring other models.
        overwrite: Boolean. Defaults to `False`. If `False`, reloads an existing
            project of the same name if one is found. Otherwise, overwrites the
            project.
        seed: Int. Random seed.
        max_model_size: Int. Maximum number of scalars in the parameters of a
            model. Models larger than this are rejected.
        **kwargs: Any arguments supported by AutoModel.
    """

    def __init__(
        self,
        output_dim: Optional[int] = None,
        loss: types.LossType = "mean_squared_error",
        metrics: Optional[types.MetricsType] = None,
        project_name: str = "image_regressor",
        max_trials: int = 100,
        directory: Union[str, Path, None] = None,
        objective: str = "val_loss",
        tuner: Union[str, Type[tuner.AutoTuner]] = None,
        overwrite: bool = False,
        seed: Optional[int] = None,
        max_model_size: Optional[int] = None,
        **kwargs
    ):
        if tuner is None:
            tuner = greedy.Greedy
        super().__init__(
            outputs=blocks.RegressionHead(
                output_dim=output_dim, loss=loss, metrics=metrics
            ),
            max_trials=max_trials,
            directory=directory,
            project_name=project_name,
            objective=objective,
            tuner=tuner,
            overwrite=overwrite,
            seed=seed,
            max_model_size=max_model_size,
            **kwargs
        )

    def fit(
        self,
        x: Optional[types.DatasetType] = None,
        y: Optional[types.DatasetType] = None,
        epochs: Optional[int] = None,
        callbacks: Optional[List[keras.callbacks.Callback]] = None,
        validation_split: Optional[float] = 0.2,
        validation_data: Union[
            types.DatasetType, Tuple[types.DatasetType], None
        ] = None,
        **kwargs
    ):
        """Search for the best model and hyperparameters for the AutoModel.

        It will search for the best model based on the performances on
        validation data.

        # Arguments
            x: numpy.ndarray. Training data x. The shape
                of the data should be (samples, width, height) or (samples,
                width, height, channels).
            y: numpy.ndarray. Training data y. The targets
                passing to the head would have to be np.ndarray. It can be
                single-column or multi-column.  The values should all be
                numerical.
            epochs: Int. The number of epochs to train each model during the
                search. If unspecified, by default we train for a maximum of
                1000 epochs, but we stop training if the validation loss stops
                improving for 10 epochs (unless you specified an EarlyStopping
                callback as part of the callbacks argument, in which case the
                EarlyStopping callback you specified will determine early
                stopping).
            callbacks: List of Keras callbacks to apply during training and
                validation.
            validation_split: Float between 0 and 1. Defaults to 0.2. Fraction
                of the training data to be used as validation data. The model
                will set apart this fraction of the training data, will not
                train on it, and will evaluate the loss and any model metrics on
                this data at the end of each epoch. The validation data is
                selected from the last samples in the `x` and `y` data provided,
                before shuffling. This argument is not supported when `x` is a
                dataset.  The best model found would be fit on the entire
                dataset including the validation data.
            validation_data: Data on which to evaluate the loss and any model
                metrics at the end of each epoch. The model will not be trained
                on this data. `validation_data` will override
                `validation_split`. The type of the validation data should be
                the same as the training data. The best model found would be
                fit on the training dataset without the validation data.
            **kwargs: Any arguments supported by
                [keras.Model.fit](https://keras.io/api/models/model_training_apis/#fit-method).
        # Returns
            history: A Keras History object corresponding to the best model.
                Its History.history attribute is a record of training
                loss values and metrics values at successive epochs, as well as
                validation loss values and validation metrics values (if
                applicable).
        """
        history = super().fit(
            x=x,
            y=y,
            epochs=epochs,
            callbacks=callbacks,
            validation_split=validation_split,
            validation_data=validation_data,
            **kwargs
        )
        return history


================================================
FILE: autokeras/tasks/image_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from unittest import mock

import autokeras as ak
from autokeras import test_utils


@mock.patch("autokeras.AutoModel.fit")
def test_img_clf_fit_call_auto_model_fit(fit, tmp_path):
    auto_model = ak.ImageClassifier(directory=tmp_path, seed=test_utils.SEED)

    auto_model.fit(
        x=test_utils.generate_data(num_instances=100, shape=(32, 32, 3)),
        y=test_utils.generate_one_hot_labels(num_instances=100, num_classes=10),
    )

    assert fit.is_called


@mock.patch("autokeras.AutoModel.fit")
def test_img_reg_fit_call_auto_model_fit(fit, tmp_path):
    auto_model = ak.ImageRegressor(directory=tmp_path, seed=test_utils.SEED)

    auto_model.fit(
        x=test_utils.generate_data(num_instances=100, shape=(32, 32, 3)),
        y=test_utils.generate_data(num_instances=100, shape=(1,)),
    )

    assert fit.is_called


================================================
FILE: autokeras/tasks/structured_data.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pathlib
from typing import Dict
from typing import List
from typing import Optional
from typing import Type
from typing import Union

import tree

from autokeras import auto_model
from autokeras import blocks
from autokeras import nodes as input_module
from autokeras.engine import tuner
from autokeras.tuners import task_specific
from autokeras.utils import types


class BaseStructuredDataPipeline(auto_model.AutoModel):
    def __init__(self, inputs, outputs, **kwargs):
        self.check(inputs.column_names, inputs.column_types)
        super().__init__(inputs=inputs, outputs=outputs, **kwargs)
        self._target_col_name = None

    def check(self, column_names, column_types):
        if column_types:
            for column_type in column_types.values():
                if column_type not in ["categorical", "numerical"]:
                    raise ValueError(
                        'column_types should be either "categorical" '
                        'or "numerical", but got {name}'.format(
                            name=column_type
                        )
                    )

    def check_in_fit(self, x):
        input_node = tree.flatten(self.inputs)[0]
        if input_node.column_names and input_node.column_types:
            for column_name in input_node.column_types:
                if column_name not in input_node.column_names:
                    raise ValueError(
                        "column_names and column_types are "
                        "mismatched. Cannot find column name "
                        "{name} in the data.".format(name=column_name)
                    )

    def fit(
        self,
        x=None,
        y=None,
        epochs=None,
        callbacks=None,
        validation_split=0.2,
        validation_data=None,
        **kwargs
    ):
        """Search for the best model and hyperparameters for the AutoModel.

        # Arguments
            x: numpy.ndarray.
                Training data x. It can be string or numerical data.
            y: numpy.ndarray. Training data y.
                It can be raw labels, one-hot encoded if more than two classes,
                or binary encoded for binary classification.
            epochs: Int. The number of epochs to train each model during the
                search. If unspecified, we would use epochs equal to 1000 and
                early stopping with patience equal to 10.
            callbacks: List of Keras callbacks to apply during training and
                validation.
            validation_split: Float between 0 and 1. Defaults to 0.2. Fraction
                of the training data to be used as validation data.  The model
                will set apart this fraction of the training data, will not
                train on it, and will evaluate the loss and any model metrics on
                this data at the end of each epoch.  The validation data is
                selected from the last samples in the `x` and `y` data provided,
                before shuffling. This argument is not supported when `x` is a
                dataset.
            validation_data: Data on which to evaluate the loss and any model
                metrics at the end of each epoch. The model will not be trained
                on this data. `validation_data` will override
                `validation_split`. The type of the validation data should be
                the same as the training data. The best model found would be
                fit on the training dataset without the validation data.
            **kwargs: Any arguments supported by
                [keras.Model.fit](https://keras.io/api/models/model_training_apis/#fit-method).
        # Returns
            history: A Keras History object corresponding to the best model.
                Its History.history attribute is a record of training
                loss values and metrics values at successive epochs, as well as
                validation loss values and validation metrics values (if
                applicable).
        """
        self.check_in_fit(x)

        history = super().fit(
            x=x,
            y=y,
            epochs=epochs,
            callbacks=callbacks,
            validation_split=validation_split,
            validation_data=validation_data,
            **kwargs
        )
        return history

    def predict(self, x, **kwargs):
        """Predict the output for a given testing data.

        # Arguments
            x: numpy.ndarray.
                Testing data x. It can be string or numerical data.
            **kwargs: Any arguments supported by keras.Model.predict.

        # Returns
            A list of numpy.ndarray objects or a single numpy.ndarray.
            The predicted results.
        """
        return super().predict(x=x, **kwargs)

    def evaluate(self, x, y=None, **kwargs):
        """Evaluate the best model for the given data.

        # Arguments
            x: numpy.ndarray.
                Testing data x. It can be string or numerical data.
            y: numpy.ndarray. Testing data y.
                It can be string labels or numerical values.
            **kwargs: Any arguments supported by keras.Model.evaluate.

        # Returns
            Scalar test loss (if the model has a single output and no metrics)
            or list of scalars (if the model has multiple outputs and/or
            metrics). The attribute model.metrics_names will give you the
            display labels for the scalar outputs.
        """
        return super().evaluate(x=x, y=y, **kwargs)


class SupervisedStructuredDataPipeline(BaseStructuredDataPipeline):
    def __init__(self, outputs, column_names, column_types, **kwargs):
        inputs = input_module.StructuredDataInput(
            column_names=column_names, column_types=column_types
        )
        super().__init__(inputs=inputs, outputs=outputs, **kwargs)


class StructuredDataClassifier(SupervisedStructuredDataPipeline):
    """AutoKeras structured data classification class.

    # Arguments
        column_names: A list of strings specifying the names of the columns. The
            length of the list should be equal to the number of columns of the
            data excluding the target column. Defaults to None.
        column_types: Dict. The keys are the column names. The values should
            either be 'numerical' or 'categorical', indicating the type of that
            column.  Defaults to None. If not None, the column_names need to be
            specified.  If None, it will be inferred from the data.
        num_classes: Int. Defaults to None. If None, it will be inferred from
            the data.
        multi_label: Boolean. Defaults to False.
        loss: A Keras loss function. Defaults to use 'binary_crossentropy' or
            'categorical_crossentropy' based on the number of classes.
        metrics: A list of Keras metrics. Defaults to use 'accuracy'.
        project_name: String. The name of the AutoModel. Defaults to
            'structured_data_classifier'.
        max_trials: Int. The maximum number of different Keras Models to try.
            The search may finish before reaching the max_trials. Defaults to
            100.
        directory: String. The path to a directory for storing the search
            outputs. Defaults to None, which would create a folder with the
            name of the AutoModel in the current directory.
        objective: String. Name of model metric to minimize
            or maximize. Defaults to 'val_accuracy'.
        tuner: String or subclass of AutoTuner. If string, it should be one of
            'greedy', 'bayesian', 'hyperband' or 'random'. It can also be a
            subclass of AutoTuner. If left unspecified, it uses a task specific
            tuner, which first evaluates the most commonly used models for the
            task before exploring other models.
        overwrite: Boolean. Defaults to `False`. If `False`, reloads an existing
            project of the same name if one is found. Otherwise, overwrites the
            project.
        seed: Int. Random seed.
        max_model_size: Int. Maximum number of scalars in the parameters of a
            model. Models larger than this are rejected.
        **kwargs: Any arguments supported by AutoModel.
    """

    def __init__(
        self,
        column_names: Optional[List[str]] = None,
        column_types: Optional[Dict] = None,
        num_classes: Optional[int] = None,
        multi_label: bool = False,
        loss: Optional[types.LossType] = None,
        metrics: Optional[types.MetricsType] = None,
        project_name: str = "structured_data_classifier",
        max_trials: int = 100,
        directory: Optional[Union[str, pathlib.Path]] = None,
        objective: str = "val_accuracy",
        tuner: Union[str, Type[tuner.AutoTuner]] = None,
        overwrite: bool = False,
        seed: Optional[int] = None,
        max_model_size: Optional[int] = None,
        **kwargs
    ):
        if tuner is None:
            tuner = task_specific.StructuredDataClassifierTuner
        super().__init__(
            outputs=blocks.ClassificationHead(
                num_classes=num_classes,
                multi_label=multi_label,
                loss=loss,
                metrics=metrics,
            ),
            column_names=column_names,
            column_types=column_types,
            max_trials=max_trials,
            directory=directory,
            project_name=project_name,
            objective=objective,
            tuner=tuner,
            overwrite=overwrite,
            seed=seed,
            max_model_size=max_model_size,
            **kwargs
        )

    def fit(
        self,
        x=None,
        y=None,
        epochs=None,
        callbacks=None,
        validation_split=0.2,
        validation_data=None,
        **kwargs
    ):
        """Search for the best model and hyperparameters for the AutoModel.

        # Arguments
            x: numpy.ndarray. Training data x.
            y: numpy.ndarray. Training data y.
            epochs: Int. The number of epochs to train each model during the
                search. If unspecified, we would use epochs equal to 1000 and
                early stopping with patience equal to 10.
            callbacks: List of Keras callbacks to apply during training and
                validation.
            validation_split: Float between 0 and 1. Defaults to 0.2. Fraction
                of the training data to be used as validation data.  The model
                will set apart this fraction of the training data, will not
                train on it, and will evaluate the loss and any model metrics on
                this data at the end of each epoch.  The validation data is
                selected from the last samples in the `x` and `y` data provided,
                before shuffling. This argument is not supported when `x` is a
                dataset.
            validation_data: Data on which to evaluate the loss and any model
                metrics at the end of each epoch. The model will not be trained
                on this data.  `validation_data` will override
                `validation_split`. The type of the validation data should be
                the same as the training data.
            **kwargs: Any arguments supported by
                [keras.Model.fit](https://keras.io/api/models/model_training_apis/#fit-method).
        # Returns
            history: A Keras History object corresponding to the best model.
                Its History.history attribute is a record of training
                loss values and metrics values at successive epochs, as well as
                validation loss values and validation metrics values (if
                applicable).
        """
        history = super().fit(
            x=x,
            y=y,
            epochs=epochs,
            callbacks=callbacks,
            validation_split=validation_split,
            validation_data=validation_data,
            **kwargs
        )
        return history


class StructuredDataRegressor(SupervisedStructuredDataPipeline):
    """AutoKeras structured data regression class.

    # Arguments
        column_names: A list of strings specifying the names of the columns. The
            length of the list should be equal to the number of columns of the
            data excluding the target column. Defaults to None.
        column_types: Dict. The keys are the column names. The values should
            either be 'numerical' or 'categorical', indicating the type of that
            column. Defaults to None. If not None, the column_names need to be
            specified. If None, it will be inferred from the data.
        output_dim: Int. The number of output dimensions. Defaults to None.
            If None, it will be inferred from the data.
        loss: A Keras loss function. Defaults to use 'mean_squared_error'.
        metrics: A list of Keras metrics. Defaults to use 'mean_squared_error'.
        project_name: String. The name of the AutoModel. Defaults to
            'structured_data_regressor'.
        max_trials: Int. The maximum number of different Keras Models to try.
            The search may finish before reaching the max_trials. Defaults to
            100.
        directory: String. The path to a directory for storing the search
            outputs. Defaults to None, which would create a folder with the
            name of the AutoModel in the current directory.
        objective: String. Name of model metric to minimize
            or maximize, e.g. 'val_accuracy'. Defaults to 'val_loss'.
        tuner: String or subclass of AutoTuner. If string, it should be one of
            'greedy', 'bayesian', 'hyperband' or 'random'. It can also be a
            subclass of AutoTuner. If left unspecified, it uses a task specific
            tuner, which first evaluates the most commonly used models for the
            task before exploring other models.
        overwrite: Boolean. Defaults to `False`. If `False`, reloads an existing
            project of the same name if one is found. Otherwise, overwrites the
            project.
        seed: Int. Random seed.
        max_model_size: Int. Maximum number of scalars in the parameters of a
            model. Models larger than this are rejected.
        **kwargs: Any arguments supported by AutoModel.
    """

    def __init__(
        self,
        column_names: Optional[List[str]] = None,
        column_types: Optional[Dict[str, str]] = None,
        output_dim: Optional[int] = None,
        loss: types.LossType = "mean_squared_error",
        metrics: Optional[types.MetricsType] = None,
        project_name: str = "structured_data_regressor",
        max_trials: int = 100,
        directory: Union[str, pathlib.Path, None] = None,
        objective: str = "val_loss",
        tuner: Union[str, Type[tuner.AutoTuner]] = None,
        overwrite: bool = False,
        seed: Optional[int] = None,
        max_model_size: Optional[int] = None,
        **kwargs
    ):
        if tuner is None:
            tuner = task_specific.StructuredDataRegressorTuner
        super().__init__(
            outputs=blocks.RegressionHead(
                output_dim=output_dim, loss=loss, metrics=metrics
            ),
            column_names=column_names,
            column_types=column_types,
            max_trials=max_trials,
            directory=directory,
            project_name=project_name,
            objective=objective,
            tuner=tuner,
            overwrite=overwrite,
            seed=seed,
            max_model_size=max_model_size,
            **kwargs
        )


================================================
FILE: autokeras/tasks/structured_data_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from unittest import mock

import numpy as np
import pandas as pd
import pytest

import autokeras as ak
from autokeras import test_utils


def test_raise_error_unknown_str_in_col_type(tmp_path):
    with pytest.raises(ValueError) as info:
        ak.StructuredDataClassifier(
            column_types={"age": "num", "parch": "categorical"},
            directory=tmp_path,
            seed=test_utils.SEED,
        )

    assert 'column_types should be either "categorical"' in str(info.value)


def test_structured_data_input_name_type_mismatch_error(tmp_path):
    with pytest.raises(ValueError) as info:
        clf = ak.StructuredDataClassifier(
            column_types={"_age": "numerical", "parch": "categorical"},
            column_names=["age", "fare"],
            directory=tmp_path,
            seed=test_utils.SEED,
        )
        clf.fit(x=test_utils.TRAIN_CSV_PATH, y="survived")

    assert "column_names and column_types are mismatched." in str(info.value)


def test_structured_data_col_type_no_name_error(tmp_path):
    with pytest.raises(ValueError) as info:
        clf = ak.StructuredDataClassifier(
            column_types={"age": "numerical", "parch": "categorical"},
            directory=tmp_path,
            seed=test_utils.SEED,
        )
        clf.fit(x=np.random.rand(100, 30), y=np.random.rand(100, 1))

    assert "column_names must be specified" in str(info.value)


@mock.patch("autokeras.AutoModel.fit")
@mock.patch("autokeras.AutoModel.evaluate")
def test_structured_clf_evaluate_call_automodel_evaluate(
    evaluate, fit, tmp_path
):
    auto_model = ak.StructuredDataClassifier(
        directory=tmp_path, seed=test_utils.SEED
    )

    auto_model.fit(x=test_utils.TRAIN_CSV_PATH, y="survived")
    auto_model.evaluate(x=test_utils.TRAIN_CSV_PATH, y="survived")

    assert evaluate.is_called


@mock.patch("autokeras.AutoModel.fit")
@mock.patch("autokeras.AutoModel.predict")
def test_structured_clf_predict_csv_call_automodel_predict(
    predict, fit, tmp_path
):
    auto_model = ak.StructuredDataClassifier(
        directory=tmp_path, seed=test_utils.SEED
    )

    auto_model.fit(x=test_utils.TRAIN_CSV_PATH, y="survived")
    auto_model.predict(x=test_utils.TEST_CSV_PATH)

    assert predict.is_called


@mock.patch("autokeras.AutoModel.fit")
def test_structured_clf_fit_call_auto_model_fit(fit, tmp_path):
    auto_model = ak.StructuredDataClassifier(
        directory=tmp_path, seed=test_utils.SEED
    )

    auto_model.fit(
        x=pd.read_csv(test_utils.TRAIN_CSV_PATH).to_numpy().astype(str)[:100],
        y=test_utils.generate_one_hot_labels(num_instances=100, num_classes=3),
    )

    assert fit.is_called


@mock.patch("autokeras.AutoModel.fit")
def test_structured_reg_fit_call_auto_model_fit(fit, tmp_path):
    auto_model = ak.StructuredDataRegressor(
        directory=tmp_path, seed=test_utils.SEED
    )

    auto_model.fit(
        x=pd.read_csv(test_utils.TRAIN_CSV_PATH).to_numpy().astype(str)[:100],
        y=test_utils.generate_data(num_instances=100, shape=(1,)),
    )

    assert fit.is_called


================================================
FILE: autokeras/tasks/text.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from pathlib import Path
from typing import Optional
from typing import Type
from typing import Union

from autokeras import auto_model
from autokeras import blocks
from autokeras import nodes as input_module
from autokeras.engine import tuner
from autokeras.tuners import greedy
from autokeras.tuners import task_specific
from autokeras.utils import types


class SupervisedTextPipeline(auto_model.AutoModel):
    def __init__(self, outputs, **kwargs):
        super().__init__(
            inputs=input_module.TextInput(), outputs=outputs, **kwargs
        )


class TextClassifier(SupervisedTextPipeline):
    """AutoKeras text classification class.

    # Arguments
        num_classes: Int. Defaults to None. If None, it will be inferred from
            the data.
        multi_label: Boolean. Defaults to False.
        loss: A Keras loss function. Defaults to use 'binary_crossentropy' or
            'categorical_crossentropy' based on the number of classes.
        metrics: A list of Keras metrics. Defaults to use 'accuracy'.
        project_name: String. The name of the AutoModel.
            Defaults to 'text_classifier'.
        max_trials: Int. The maximum number of different Keras Models to try.
            The search may finish before reaching the max_trials. Defaults to
            100.
        directory: String. The path to a directory for storing the search
            outputs. Defaults to None, which would create a folder with the
            name of the AutoModel in the current directory.
        objective: String. Name of model metric to minimize
            or maximize, e.g. 'val_accuracy'. Defaults to 'val_loss'.
        tuner: String or subclass of AutoTuner. If string, it should be one of
            'greedy', 'bayesian', 'hyperband' or 'random'. It can also be a
            subclass of AutoTuner. If left unspecified, it uses a task specific
            tuner, which first evaluates the most commonly used models for the
            task before exploring other models.
        overwrite: Boolean. Defaults to `False`. If `False`, reloads an existing
            project of the same name if one is found. Otherwise, overwrites the
            project.
        seed: Int. Random seed.
        max_model_size: Int. Maximum number of scalars in the parameters of a
            model. Models larger than this are rejected.
        **kwargs: Any arguments supported by AutoModel.
    """

    def __init__(
        self,
        num_classes: Optional[int] = None,
        multi_label: bool = False,
        loss: types.LossType = None,
        metrics: Optional[types.MetricsType] = None,
        project_name: str = "text_classifier",
        max_trials: int = 100,
        directory: Union[str, Path, None] = None,
        objective: str = "val_loss",
        tuner: Union[str, Type[tuner.AutoTuner]] = None,
        overwrite: bool = False,
        seed: Optional[int] = None,
        max_model_size: Optional[int] = None,
        **kwargs
    ):
        if tuner is None:
            tuner = task_specific.TextClassifierTuner
        super().__init__(
            outputs=blocks.ClassificationHead(
                num_classes=num_classes,
                multi_label=multi_label,
                loss=loss,
                metrics=metrics,
            ),
            max_trials=max_trials,
            directory=directory,
            project_name=project_name,
            objective=objective,
            tuner=tuner,
            overwrite=overwrite,
            seed=seed,
            max_model_size=max_model_size,
            **kwargs
        )

    def fit(
        self,
        x=None,
        y=None,
        epochs=None,
        callbacks=None,
        validation_split=0.2,
        validation_data=None,
        **kwargs
    ):
        """Search for the best model and hyperparameters for the AutoModel.

        It will search for the best model based on the performances on
        validation data.

        # Arguments
            x: numpy.ndarray. Training data x. The input
                data should be numpy.ndarray. The data should
                be one dimensional. Each element in the data should be a string
                which is a full sentence.
            y: numpy.ndarray. Training data y. It can be
                raw labels, one-hot encoded if more than two classes, or binary
                encoded for binary classification.
            epochs: Int. The number of epochs to train each model during the
                search. If unspecified, by default we train for a maximum of
                1000 epochs, but we stop training if the validation loss stops
                improving for 10 epochs (unless you specified an EarlyStopping
                callback as part of the callbacks argument, in which case the
                EarlyStopping callback you specified will determine early
                stopping).
            callbacks: List of Keras callbacks to apply during training and
                validation.
            validation_split: Float between 0 and 1. Defaults to 0.2. Fraction
                of the training data to be used as validation data. The model
                will set apart this fraction of the training data, will not
                train on it, and will evaluate the loss and any model metrics on
                this data at the end of each epoch. The validation data is
                selected from the last samples in the `x` and `y` data provided,
                before shuffling. This argument is not supported when `x` is a
                dataset. The best model found would be fit on the entire
                dataset including the validation data.
            validation_data: Data on which to evaluate the loss and any model
                metrics at the end of each epoch. The model will not be trained
                on this data. `validation_data` will override
                `validation_split`. The type of the validation data should be
                the same as the training data.  The best model found would be
                fit on the training dataset without the validation data.
            **kwargs: Any arguments supported by
                [keras.Model.fit](https://keras.io/api/models/model_training_apis/#fit-method).

        # Returns
            history: A Keras History object corresponding to the best model.
                Its History.history attribute is a record of training
                loss values and metrics values at successive epochs, as well as
                validation loss values and validation metrics values (if
                applicable).
        """
        history = super().fit(
            x=x,
            y=y,
            epochs=epochs,
            callbacks=callbacks,
            validation_split=validation_split,
            validation_data=validation_data,
            **kwargs
        )
        return history


class TextRegressor(SupervisedTextPipeline):
    """AutoKeras text regression class.

    # Arguments
        output_dim: Int. The number of output dimensions. Defaults to None.
            If None, it will be inferred from the data.
        loss: A Keras loss function. Defaults to use 'mean_squared_error'.
        metrics: A list of Keras metrics. Defaults to use 'mean_squared_error'.
        project_name: String. The name of the AutoModel.
            Defaults to 'text_regressor'.
        max_trials: Int. The maximum number of different Keras Models to try.
            The search may finish before reaching the max_trials. Defaults to
            100.
        directory: String. The path to a directory for storing the search
            outputs. Defaults to None, which would create a folder with the
            name of the AutoModel in the current directory.
        objective: String. Name of model metric to minimize
            or maximize, e.g. 'val_accuracy'. Defaults to 'val_loss'.
        tuner: String or subclass of AutoTuner. If string, it should be one of
            'greedy', 'bayesian', 'hyperband' or 'random'. It can also be a
            subclass of AutoTuner. If left unspecified, it uses a task specific
            tuner, which first evaluates the most commonly used models for the
            task before exploring other models.
        overwrite: Boolean. Defaults to `False`. If `False`, reloads an existing
            project of the same name if one is found. Otherwise, overwrites the
            project.
        seed: Int. Random seed.
        max_model_size: Int. Maximum number of scalars in the parameters of a
            model. Models larger than this are rejected.
        **kwargs: Any arguments supported by AutoModel.
    """

    def __init__(
        self,
        output_dim: Optional[int] = None,
        loss: types.LossType = "mean_squared_error",
        metrics: Optional[types.MetricsType] = None,
        project_name: str = "text_regressor",
        max_trials: int = 100,
        directory: Union[str, Path, None] = None,
        objective: str = "val_loss",
        tuner: Union[str, Type[tuner.AutoTuner]] = None,
        overwrite: bool = False,
        seed: Optional[int] = None,
        max_model_size: Optional[int] = None,
        **kwargs
    ):
        if tuner is None:
            tuner = greedy.Greedy
        super().__init__(
            outputs=blocks.RegressionHead(
                output_dim=output_dim, loss=loss, metrics=metrics
            ),
            max_trials=max_trials,
            directory=directory,
            project_name=project_name,
            objective=objective,
            tuner=tuner,
            overwrite=overwrite,
            seed=seed,
            max_model_size=max_model_size,
            **kwargs
        )

    def fit(
        self,
        x=None,
        y=None,
        epochs=None,
        callbacks=None,
        validation_split=0.2,
        validation_data=None,
        **kwargs
    ):
        """Search for the best model and hyperparameters for the AutoModel.

        It will search for the best model based on the performances on
        validation data.

        # Arguments
            x: numpy.ndarray. Training data x. The input
                data should be numpy.ndarray. The data should
                be one dimensional. Each element in the data should be a string
                which is a full sentence.
            y: numpy.ndarray. Training data y. The targets
                passing to the head would have to be np.ndarray,. It can be
                single-column or multi-column.  The values should all be
                numerical.
            epochs: Int. The number of epochs to train each model during the
                search. If unspecified, by default we train for a maximum of
                1000 epochs, but we stop training if the validation loss stops
                improving for 10 epochs (unless you specified an EarlyStopping
                callback as part of the callbacks argument, in which case the
                EarlyStopping callback you specified will determine early
                stopping).
            callbacks: List of Keras callbacks to apply during training and
                validation.
            validation_split: Float between 0 and 1. Defaults to 0.2. Fraction
                of the training data to be used as validation data. The model
                will set apart this fraction of the training data, will not
                train on it, and will evaluate the loss and any model metrics on
                this data at the end of each epoch. The validation data is
                selected from the last samples in the `x` and `y` data provided,
                before shuffling. This argument is not supported when `x` is a
                dataset. The best model found would be fit on the entire
                dataset including the validation data.
            validation_data: Data on which to evaluate the loss and any model
                metrics at the end of each epoch. The model will not be trained
                on this data. `validation_data` will override
                `validation_split`. The type of the validation data should be
                the same as the training data. The best model found would be
                fit on the training dataset without the validation data.
            **kwargs: Any arguments supported by
                [keras.Model.fit](https://keras.io/api/models/model_training_apis/#fit-method).
        # Returns
            history: A Keras History object corresponding to the best model.
                Its History.history attribute is a record of training
                loss values and metrics values at successive epochs, as well as
                validation loss values and validation metrics values (if
                applicable).
        """
        history = super().fit(
            x=x,
            y=y,
            epochs=epochs,
            callbacks=callbacks,
            validation_split=validation_split,
            validation_data=validation_data,
            **kwargs
        )
        return history


================================================
FILE: autokeras/tasks/text_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from unittest import mock

import numpy as np

import autokeras as ak
from autokeras import test_utils


@mock.patch("autokeras.AutoModel.fit")
def test_txt_clf_fit_call_auto_model_fit(fit, tmp_path):
    auto_model = ak.TextClassifier(directory=tmp_path, seed=test_utils.SEED)

    auto_model.fit(x=np.array(["a b c", "b b c"]), y=np.array([1, 2]))

    assert fit.is_called


@mock.patch("autokeras.AutoModel.fit")
def test_txt_reg_fit_call_auto_model_fit(fit, tmp_path):
    auto_model = ak.TextRegressor(directory=tmp_path, seed=test_utils.SEED)

    auto_model.fit(x=np.array(["a b c", "b b c"]), y=np.array([1.0, 2.0]))

    assert fit.is_called


================================================
FILE: autokeras/test_utils.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import inspect
import os

import keras
import numpy as np

import autokeras as ak

SEED = 5


COLUMN_NAMES = [
    "sex",
    "age",
    "n_siblings_spouses",
    "parch",
    "fare",
    "class",
    "deck",
    "embark_town",
    "alone",
]
COLUMN_TYPES = {
    "sex": "categorical",
    "age": "numerical",
    "n_siblings_spouses": "categorical",
    "parch": "categorical",
    "fare": "numerical",
    "class": "categorical",
    "deck": "categorical",
    "embark_town": "categorical",
    "alone": "categorical",
}

ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
# Use pre-split CSVs stored under benchmark/datasets to avoid downloads and
# processing at import time.
TRAIN_CSV_PATH = os.path.join(
    ROOT_DIR, "benchmark", "datasets", "titanic_train.csv"
)
TEST_CSV_PATH = os.path.join(
    ROOT_DIR, "benchmark", "datasets", "titanic_test.csv"
)


def generate_data(num_instances=100, shape=(32, 32, 3)):
    np.random.seed(SEED)
    result = np.random.rand(*((num_instances,) + shape))
    if result.dtype == np.float64:
        result = result.astype(np.float32)
    return result


def generate_one_hot_labels(num_instances=100, num_classes=10):
    np.random.seed(SEED)
    labels = np.random.randint(num_classes, size=num_instances)
    result = keras.utils.to_categorical(labels, num_classes=num_classes)
    return result


def generate_text_data(num_instances=100):
    vocab = np.array(
        [
            ["adorable", "clueless", "dirty", "odd", "stupid"],
            ["puppy", "car", "rabbit", "girl", "monkey"],
            ["runs", "hits", "jumps", "drives", "barfs"],
            [
                "crazily.",
                "dutifully.",
                "foolishly.",
                "merrily.",
                "occasionally.",
            ],
        ]
    )
    return np.array(
        [
            " ".join([vocab[j][np.random.randint(0, 5)] for j in range(4)])
            for i in range(num_instances)
        ]
    )


def build_graph():
    keras.backend.clear_session()
    image_input = ak.ImageInput(shape=(32, 32, 3))
    image_input.batch_size = 32
    image_input.num_samples = 1000
    merged_outputs = ak.SpatialReduction()(image_input)
    head = ak.ClassificationHead(num_classes=10, shape=(10,))
    classification_outputs = head(merged_outputs)
    return ak.graph.Graph(inputs=image_input, outputs=classification_outputs)


def get_func_args(func):
    params = inspect.signature(func).parameters.keys()
    return set(params) - set(["self", "args", "kwargs"])


def get_object_detection_data():
    images = generate_data(num_instances=2, shape=(32, 32, 3))

    bbox_0 = np.random.rand(3, 4)
    class_id_0 = np.random.rand(
        3,
    )

    bbox_1 = np.random.rand(5, 4)
    class_id_1 = np.random.rand(
        5,
    )

    labels = np.array(
        [(bbox_0, class_id_0), (bbox_1, class_id_1)], dtype=object
    )

    return images, labels


def generate_data_with_categorical(
    num_instances=100,
    num_numerical=10,
    num_categorical=3,
    num_classes=5,
):
    categorical_data = np.random.randint(
        num_classes, size=(num_instances, num_categorical)
    )
    numerical_data = np.random.rand(num_instances, num_numerical)
    data = np.concatenate((numerical_data, categorical_data), axis=1)
    if data.dtype == np.float64:
        data = data.astype(np.float32)
    return data


================================================
FILE: autokeras/tuners/__init__.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from autokeras.tuners.bayesian_optimization import BayesianOptimization
from autokeras.tuners.greedy import Greedy
from autokeras.tuners.hyperband import Hyperband
from autokeras.tuners.random_search import RandomSearch
from autokeras.tuners.task_specific import ImageClassifierTuner


================================================
FILE: autokeras/tuners/bayesian_optimization.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import keras_tuner

from autokeras.engine import tuner as tuner_module


class BayesianOptimization(
    keras_tuner.BayesianOptimization, tuner_module.AutoTuner
):
    """KerasTuner BayesianOptimization with preprocessing layer tuning."""

    pass


================================================
FILE: autokeras/tuners/greedy.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
from typing import Any
from typing import Dict
from typing import List
from typing import Optional

import keras_tuner
import numpy as np

from autokeras.engine import tuner as tuner_module


class TrieNode(object):
    def __init__(self):
        super().__init__()
        self.num_leaves = 0
        self.children = {}
        self.hp_name = None

    def is_leaf(self):
        return len(self.children) == 0


class Trie(object):
    def __init__(self):
        super().__init__()
        self.root = TrieNode()

    def insert(self, hp_name):
        names = hp_name.split("/")

        new_word = False
        current_node = self.root
        nodes_on_path = [current_node]
        for name in names:
            if name not in current_node.children:
                current_node.children[name] = TrieNode()
                new_word = True
            current_node = current_node.children[name]
            nodes_on_path.append(current_node)
        current_node.hp_name = hp_name

        if new_word:
            for node in nodes_on_path:
                node.num_leaves += 1

    @property
    def nodes(self):
        return self._get_all_nodes(self.root)

    def _get_all_nodes(self, node):
        ret = [node]
        for key, value in node.children.items():
            ret += self._get_all_nodes(value)
        return ret

    def get_hp_names(self, node):
        if node.is_leaf():
            return [node.hp_name]
        ret = []
        for key, value in node.children.items():
            ret += self.get_hp_names(value)
        return ret


class GreedyOracle(keras_tuner.Oracle):
    """An oracle combining random search and greedy algorithm.

    It groups the HyperParameters into several categories, namely, HyperGraph,
    Preprocessor, Architecture, and Optimization. The oracle tunes each group
    separately using random search. In each trial, it use a greedy strategy to
    generate new values for one of the categories of HyperParameters and use the
    best trial so far for the rest of the HyperParameters values.

    # Arguments
        initial_hps: A list of dictionaries in the form of
            {HyperParameter name (String): HyperParameter value}.
            Each dictionary is one set of HyperParameters, which are used as the
            initial trials for the search. Defaults to None.
        seed: Int. Random seed.
    """

    def __init__(self, initial_hps=None, seed=None, **kwargs):
        super().__init__(seed=seed, **kwargs)
        self.initial_hps = copy.deepcopy(initial_hps) or []
        self._tried_initial_hps = [False] * len(self.initial_hps)

    def get_state(self):
        state = super().get_state()
        state.update(
            {
                "initial_hps": self.initial_hps,
                "tried_initial_hps": self._tried_initial_hps,
            }
        )
        return state

    def set_state(self, state):
        super().set_state(state)
        self.initial_hps = state["initial_hps"]
        self._tried_initial_hps = state["tried_initial_hps"]

    def _select_hps(self):
        trie = Trie()
        best_hps = self._get_best_hps()
        for hp in best_hps.space:
            # Not picking the fixed hps for generating new values.
            if best_hps.is_active(hp) and not isinstance(
                hp, keras_tuner.engine.hyperparameters.Fixed
            ):
                trie.insert(hp.name)
        all_nodes = trie.nodes

        if len(all_nodes) <= 1:
            return []

        probabilities = np.array([1 / node.num_leaves for node in all_nodes])
        sum_p = np.sum(probabilities)
        probabilities = probabilities / sum_p
        node = np.random.choice(all_nodes, p=probabilities)

        return trie.get_hp_names(node)

    def _next_initial_hps(self):
        for index, hps in enumerate(self.initial_hps):
            if not self._tried_initial_hps[index]:
                self._tried_initial_hps[index] = True
                return hps

    def populate_space(self, trial_id):
        if not all(self._tried_initial_hps):
            values = self._next_initial_hps()
            return {
                "status": keras_tuner.engine.trial.TrialStatus.RUNNING,
                "values": values,
            }

        for _ in range(self._max_collisions):
            hp_names = self._select_hps()
            values = self._generate_hp_values(hp_names)
            # Reached max collisions.
            if values is None:
                continue
            # Values found.
            return {
                "status": keras_tuner.engine.trial.TrialStatus.RUNNING,
                "values": values,
            }
        # All stages reached max collisions.
        return {
            "status": keras_tuner.engine.trial.TrialStatus.STOPPED,
            "values": None,
        }

    def _get_best_hps(self):
        best_trials = self.get_best_trials()
        if best_trials:
            return best_trials[0].hyperparameters.copy()
        else:
            return self.hyperparameters.copy()

    def _generate_hp_values(self, hp_names):
        best_hps = self._get_best_hps()

        collisions = 0
        while True:
            hps = keras_tuner.HyperParameters()
            # Generate a set of random values.
            for hp in self.hyperparameters.space:
                hps.merge([hp])
                # if not active, do nothing.
                # if active, check if selected to be changed.
                if hps.is_active(hp):
                    # if was active and not selected, do nothing.
                    if best_hps.is_active(hp.name) and hp.name not in hp_names:
                        hps.values[hp.name] = best_hps.values[hp.name]
                        continue
                    # if was not active or selected, sample.
                    hps.values[hp.name] = hp.random_sample(self._seed_state)
                    self._seed_state += 1
            values = hps.values
            # Keep trying until the set of values is unique,
            # or until we exit due to too many collisions.
            values_hash = self._compute_values_hash(values)
            if values_hash in self._tried_so_far:
                collisions += 1
                if collisions <= self._max_collisions:
                    continue
                return None
            self._tried_so_far.add(values_hash)
            break
        return values


class Greedy(tuner_module.AutoTuner):
    def __init__(
        self,
        hypermodel: keras_tuner.HyperModel,
        objective: str = "val_loss",
        max_trials: int = 10,
        initial_hps: Optional[List[Dict[str, Any]]] = None,
        seed: Optional[int] = None,
        hyperparameters: Optional[keras_tuner.HyperParameters] = None,
        tune_new_entries: bool = True,
        allow_new_entries: bool = True,
        **kwargs
    ):
        self.seed = seed
        oracle = GreedyOracle(
            objective=objective,
            max_trials=max_trials,
            initial_hps=initial_hps,
            seed=seed,
            hyperparameters=hyperparameters,
            tune_new_entries=tune_new_entries,
            allow_new_entries=allow_new_entries,
        )
        super().__init__(oracle=oracle, hypermodel=hypermodel, **kwargs)


================================================
FILE: autokeras/tuners/greedy_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from unittest import mock

import keras
import keras_tuner

import autokeras as ak
from autokeras import test_utils
from autokeras.tuners import greedy
from autokeras.tuners import task_specific


def test_greedy_oracle_get_state_update_space_can_run():
    oracle = greedy.GreedyOracle(objective="val_loss")
    oracle.set_state(oracle.get_state())
    hp = keras_tuner.HyperParameters()
    hp.Boolean("test")
    oracle.update_space(hp)


@mock.patch("autokeras.tuners.greedy.GreedyOracle.get_best_trials")
def test_greedy_oracle_populate_different_values(get_best_trials):
    hp = keras_tuner.HyperParameters()
    test_utils.build_graph().build(hp)

    oracle = greedy.GreedyOracle(objective="val_loss", seed=test_utils.SEED)
    trial = mock.Mock()
    trial.hyperparameters = hp
    get_best_trials.return_value = [trial]

    oracle.update_space(hp)
    values_a = oracle.populate_space("a")["values"]
    values_b = oracle.populate_space("b")["values"]

    assert not all([values_a[key] == values_b[key] for key in values_a])


@mock.patch("autokeras.tuners.greedy.GreedyOracle.get_best_trials")
def test_greedy_oracle_populate_doesnt_crash_with_init_hps(get_best_trials):
    hp = keras_tuner.HyperParameters()
    keras.backend.clear_session()
    input_node = ak.ImageInput(shape=(32, 32, 3))
    input_node.batch_size = 32
    input_node.num_samples = 1000
    output_node = ak.ImageBlock()(input_node)
    head = ak.ClassificationHead(num_classes=10)
    head.shape = (10,)
    output_node = head(output_node)
    graph = ak.graph.Graph(inputs=input_node, outputs=output_node)
    graph.build(hp)

    oracle = greedy.GreedyOracle(
        initial_hps=task_specific.IMAGE_CLASSIFIER,
        objective="val_loss",
        seed=test_utils.SEED,
    )
    trial = mock.Mock()
    trial.hyperparameters = hp
    get_best_trials.return_value = [trial]

    for i in range(10):
        keras.backend.clear_session()
        values = oracle.populate_space("a")["values"]
        hp = oracle.hyperparameters.copy()
        hp.values = values
        graph.build(hp)
        oracle.update_space(hp)


@mock.patch("autokeras.tuners.greedy.GreedyOracle._compute_values_hash")
@mock.patch("autokeras.tuners.greedy.GreedyOracle.get_best_trials")
def test_greedy_oracle_stop_reach_max_collision(
    get_best_trials, compute_values_hash
):
    hp = keras_tuner.HyperParameters()
    test_utils.build_graph().build(hp)

    oracle = greedy.GreedyOracle(objective="val_loss", seed=test_utils.SEED)
    trial = mock.Mock()
    trial.hyperparameters = hp
    get_best_trials.return_value = [trial]
    compute_values_hash.return_value = 1

    oracle.update_space(hp)
    oracle.populate_space("a")["values"]
    assert (
        oracle.populate_space("b")["status"]
        == keras_tuner.engine.trial.TrialStatus.STOPPED
    )


@mock.patch("autokeras.tuners.greedy.GreedyOracle.get_best_trials")
def test_greedy_oracle_populate_space_with_no_hp(get_best_trials):
    hp = keras_tuner.HyperParameters()

    oracle = greedy.GreedyOracle(objective="val_loss", seed=test_utils.SEED)
    trial = mock.Mock()
    trial.hyperparameters = hp
    get_best_trials.return_value = [trial]

    oracle.update_space(hp)
    values_a = oracle.populate_space("a")["values"]

    assert len(values_a) == 0


================================================
FILE: autokeras/tuners/hyperband.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import keras_tuner

from autokeras.engine import tuner as tuner_module


class Hyperband(keras_tuner.Hyperband, tuner_module.AutoTuner):
    """KerasTuner Hyperband with preprocessing layer tuning."""

    def __init__(
        self, max_epochs: int = 1000, max_trials: int = 100, *args, **kwargs
    ):
        super().__init__(max_epochs=max_epochs, *args, **kwargs)
        self.oracle.max_trials = max_trials


================================================
FILE: autokeras/tuners/hyperband_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: autokeras/tuners/random_search.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import keras_tuner

from autokeras.engine import tuner as tuner_module


class RandomSearch(keras_tuner.RandomSearch, tuner_module.AutoTuner):
    """KerasTuner RandomSearch with preprocessing layer tuning."""

    pass


================================================
FILE: autokeras/tuners/random_search_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: autokeras/tuners/task_specific.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from autokeras.tuners import greedy

IMAGE_CLASSIFIER = [
    {
        "image_block_1/block_type": "vanilla",
        "image_block_1/normalize": True,
        "image_block_1/augment": False,
        "image_block_1/conv_block_1/kernel_size": 3,
        "image_block_1/conv_block_1/num_blocks": 1,
        "image_block_1/conv_block_1/num_layers": 2,
        "image_block_1/conv_block_1/max_pooling": True,
        "image_block_1/conv_block_1/separable": False,
        "image_block_1/conv_block_1/dropout": 0.25,
        "image_block_1/conv_block_1/filters_0_0": 32,
        "image_block_1/conv_block_1/filters_0_1": 64,
        "classification_head_1/spatial_reduction_1/reduction_type": "flatten",
        "classification_head_1/dropout": 0.5,
        "optimizer": "adam",
        "learning_rate": 1e-3,
    },
    {
        "image_block_1/block_type": "resnet",
        "image_block_1/normalize": True,
        "image_block_1/augment": True,
        "image_block_1/image_augmentation_1/horizontal_flip": True,
        "image_block_1/image_augmentation_1/vertical_flip": True,
        "image_block_1/image_augmentation_1/contrast_factor": 0.0,
        "image_block_1/image_augmentation_1/rotation_factor": 0.0,
        "image_block_1/image_augmentation_1/translation_factor": 0.1,
        "image_block_1/image_augmentation_1/zoom_factor": 0.0,
        "image_block_1/res_net_block_1/pretrained": False,
        "image_block_1/res_net_block_1/version": "resnet50",
        "image_block_1/res_net_block_1/imagenet_size": True,
        "classification_head_1/spatial_reduction_1/reduction_type": "global_avg",  # noqa: E501
        "classification_head_1/dropout": 0,
        "optimizer": "adam",
        "learning_rate": 1e-3,
    },
    {
        "image_block_1/block_type": "efficient",
        "image_block_1/normalize": True,
        "image_block_1/augment": True,
        "image_block_1/image_augmentation_1/horizontal_flip": True,
        "image_block_1/image_augmentation_1/vertical_flip": False,
        "image_block_1/image_augmentation_1/contrast_factor": 0.0,
        "image_block_1/image_augmentation_1/rotation_factor": 0.0,
        "image_block_1/image_augmentation_1/translation_factor": 0.1,
        "image_block_1/image_augmentation_1/zoom_factor": 0.0,
        "image_block_1/efficient_net_block_1/pretrained": True,
        "image_block_1/efficient_net_block_1/version": "b7",
        "image_block_1/efficient_net_block_1/trainable": True,
        "image_block_1/efficient_net_block_1/imagenet_size": True,
        "classification_head_1/spatial_reduction_1/reduction_type": "global_avg",  # noqa: E501
        "classification_head_1/dropout": 0,
        "optimizer": "adam",
        "learning_rate": 2e-5,
    },
]

TEXT_CLASSIFIER = [
    {
        "text_block_1/max_tokens": 500,
        "text_block_1/embedding_1/embedding_dim": 32,
        "text_block_1/embedding_1/dropout": 0.25,
        "text_block_1/conv_block_1/kernel_size": 3,
        "text_block_1/conv_block_1/separable": False,
        "text_block_1/conv_block_1/max_pooling": True,
        "text_block_1/conv_block_1/num_blocks": 2,
        "text_block_1/conv_block_1/num_layers": 2,
        "text_block_1/conv_block_1/filters_0_0": 32,
        "text_block_1/conv_block_1/filters_0_1": 32,
        "text_block_1/conv_block_1/dropout": 0.0,
        "text_block_1/conv_block_1/filters_1_0": 32,
        "text_block_1/conv_block_1/filters_1_1": 32,
        "text_block_1/spatial_reduction_1/reduction_type": "flatten",
        "text_block_1/dense_block_1/use_batchnorm": False,
        "text_block_1/dense_block_1/num_layers": 2,
        "text_block_1/dense_block_1/units_0": 32,
        "text_block_1/dense_block_1/dropout": 0.0,
        "text_block_1/dense_block_1/units_1": 32,
        "classification_head_1/dropout": 0,
        "optimizer": "adam_weight_decay",
        "learning_rate": 2e-5,
    },
]

STRUCTURED_DATA_CLASSIFIER = [
    {
        "structured_data_block_1/normalize": True,
        "structured_data_block_1/dense_block_1/num_layers": 2,
        "structured_data_block_1/dense_block_1/use_batchnorm": False,
        "structured_data_block_1/dense_block_1/dropout": 0,
        "structured_data_block_1/dense_block_1/units_0": 32,
        "structured_data_block_1/dense_block_1/units_1": 32,
        "classification_head_1/dropout": 0.0,
        "optimizer": "adam",
        "learning_rate": 0.001,
    }
]

STRUCTURED_DATA_REGRESSOR = [
    {
        "structured_data_block_1/normalize": True,
        "structured_data_block_1/dense_block_1/num_layers": 2,
        "structured_data_block_1/dense_block_1/use_batchnorm": False,
        "structured_data_block_1/dense_block_1/dropout": 0,
        "structured_data_block_1/dense_block_1/units_0": 32,
        "structured_data_block_1/dense_block_1/units_1": 32,
        "regression_head_1/dropout": 0.0,
        "optimizer": "adam",
        "learning_rate": 0.001,
    }
]


class ImageClassifierTuner(greedy.Greedy):
    def __init__(self, **kwargs):
        super().__init__(initial_hps=IMAGE_CLASSIFIER, **kwargs)


class TextClassifierTuner(greedy.Greedy):
    def __init__(self, **kwargs):
        super().__init__(initial_hps=TEXT_CLASSIFIER, **kwargs)


class StructuredDataClassifierTuner(greedy.Greedy):
    def __init__(self, **kwargs):
        super().__init__(initial_hps=STRUCTURED_DATA_CLASSIFIER, **kwargs)


class StructuredDataRegressorTuner(greedy.Greedy):
    def __init__(self, **kwargs):
        super().__init__(initial_hps=STRUCTURED_DATA_REGRESSOR, **kwargs)


================================================
FILE: autokeras/tuners/task_specific_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import copy

import keras_tuner

import autokeras as ak
from autokeras.tuners import task_specific


def test_img_clf_init_hp0_equals_hp_of_a_model(tmp_path):
    clf = ak.ImageClassifier(directory=tmp_path)
    clf.inputs[0].shape = (32, 32, 3)
    clf.outputs[0].in_blocks[0].shape = (10,)
    init_hp = task_specific.IMAGE_CLASSIFIER[0]
    hp = keras_tuner.HyperParameters()
    hp.values = copy.copy(init_hp)

    clf.tuner.hypermodel.build(hp)

    assert set(init_hp.keys()) == set(hp._hps.keys())


def test_img_clf_init_hp1_equals_hp_of_a_model(tmp_path):
    clf = ak.ImageClassifier(directory=tmp_path)
    clf.inputs[0].shape = (32, 32, 3)
    clf.outputs[0].in_blocks[0].shape = (10,)
    init_hp = task_specific.IMAGE_CLASSIFIER[1]
    hp = keras_tuner.HyperParameters()
    hp.values = copy.copy(init_hp)

    clf.tuner.hypermodel.build(hp)

    assert set(init_hp.keys()) == set(hp._hps.keys())


def test_img_clf_init_hp2_equals_hp_of_a_model(tmp_path):
    clf = ak.ImageClassifier(directory=tmp_path)
    clf.inputs[0].shape = (32, 32, 3)
    clf.outputs[0].in_blocks[0].shape = (10,)
    init_hp = task_specific.IMAGE_CLASSIFIER[2]
    hp = keras_tuner.HyperParameters()
    hp.values = copy.copy(init_hp)

    clf.tuner.hypermodel.build(hp)

    assert set(init_hp.keys()) == set(hp._hps.keys())


def test_txt_clf_init_hp0_equals_hp_of_a_model(tmp_path):
    clf = ak.TextClassifier(directory=tmp_path)
    clf.inputs[0].shape = (1,)
    clf.inputs[0].batch_size = 6
    clf.inputs[0].num_samples = 1000
    clf.outputs[0].in_blocks[0].shape = (10,)
    clf.tuner.hypermodel.epochs = 1000
    clf.tuner.hypermodel.num_samples = 20000
    init_hp = task_specific.TEXT_CLASSIFIER[0]
    hp = keras_tuner.HyperParameters()
    hp.values = copy.copy(init_hp)

    clf.tuner.hypermodel.build(hp)

    assert set(init_hp.keys()) == set(hp._hps.keys())


def test_sd_clf_init_hp0_equals_hp_of_a_model(tmp_path):
    clf = ak.StructuredDataClassifier(
        directory=tmp_path,
        column_names=["a", "b"],
        column_types={"a": "numerical", "b": "numerical"},
    )
    clf.inputs[0].shape = (2,)
    clf.outputs[0].in_blocks[0].shape = (10,)
    init_hp = task_specific.STRUCTURED_DATA_CLASSIFIER[0]
    hp = keras_tuner.HyperParameters()
    hp.values = copy.copy(init_hp)

    clf.tuner.hypermodel.build(hp)

    assert set(init_hp.keys()) == set(hp._hps.keys())


def test_sd_reg_init_hp0_equals_hp_of_a_model(tmp_path):
    clf = ak.StructuredDataRegressor(
        directory=tmp_path,
        column_names=["a", "b"],
        column_types={"a": "numerical", "b": "numerical"},
    )
    clf.inputs[0].shape = (2,)
    clf.outputs[0].in_blocks[0].shape = (10,)
    init_hp = task_specific.STRUCTURED_DATA_REGRESSOR[0]
    hp = keras_tuner.HyperParameters()
    hp.values = copy.copy(init_hp)

    clf.tuner.hypermodel.build(hp)

    assert set(init_hp.keys()) == set(hp._hps.keys())


================================================
FILE: autokeras/utils/__init__.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: autokeras/utils/data_utils.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import keras
import numpy as np
import tree
from keras import ops


def split_dataset(dataset, validation_split):
    """Split nested numpy arrays into training and validation.

    # Arguments
        dataset: A nested structure of numpy arrays.
        validation_split: Float. The split ratio for the validation set.

    # Raises
        ValueError: If the dataset provided is too small to be split.

    # Returns
        A tuple of two nested structures.
        The training set and the validation set.
    """
    # Get the number of instances from the first array's shape
    first_shape = tree.flatten(
        tree.map_structure(lambda x: np.array(x.shape), dataset)
    )[0]
    num_instances = first_shape[0]
    if num_instances < 2:
        raise ValueError(
            "The dataset should at least contain 2 instances to be split."
        )
    validation_set_size = min(
        max(int(num_instances * validation_split), 1), num_instances - 1
    )
    train_set_size = num_instances - validation_set_size
    # Split each array in the nested structure
    train_dataset = tree.map_structure(lambda x: x[:train_set_size], dataset)
    validation_dataset = tree.map_structure(
        lambda x: x[train_set_size:], dataset
    )
    return train_dataset, validation_dataset


def cast_to_float32(tensor):
    if keras.backend.standardize_dtype(tensor.dtype) == "float32":
        return tensor
    return ops.cast(tensor, "float32")  # pragma: no cover


def dataset_shape(dataset):
    """Recursively get shapes from a nested structure of numpy arrays.

    # Arguments
        nested_arrays: A nested structure (dict, list, tuple) containing numpy
            arrays.

    # Returns
        The same nested structure with shapes instead of arrays.
    """
    return tree.map_structure(lambda x: np.array(x.shape), dataset)


================================================
FILE: autokeras/utils/data_utils_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import pytest

from autokeras.utils import data_utils


def test_split_data_has_one_batch_error():
    with pytest.raises(ValueError) as info:
        data_utils.split_dataset(np.array([1]), 0.2)
    assert "The dataset should at least contain 2" in str(info.value)


================================================
FILE: autokeras/utils/io_utils.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import json

from keras_tuner.engine import hyperparameters

WHITELIST_FORMATS = (".bmp", ".gif", ".jpeg", ".jpg", ".png")


def save_json(path, obj):
    with open(path, "w") as f:
        json.dump(obj, f)


def load_json(path):
    with open(path, "r") as f:
        return json.load(f)


def deserialize_block_arg(arg):
    if isinstance(arg, dict):
        return hyperparameters.deserialize(arg)
    return arg


def serialize_block_arg(arg):
    if isinstance(arg, hyperparameters.HyperParameter):
        return hyperparameters.serialize(arg)
    return arg


================================================
FILE: autokeras/utils/layer_utils.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from keras import layers


def get_global_average_pooling(shape):
    return [
        layers.GlobalAveragePooling1D,
        layers.GlobalAveragePooling2D,
        layers.GlobalAveragePooling3D,
    ][len(shape) - 3]


def get_global_max_pooling(shape):
    return [
        layers.GlobalMaxPool1D,
        layers.GlobalMaxPool2D,
        layers.GlobalMaxPool3D,
    ][len(shape) - 3]


def get_max_pooling(shape):
    return [
        layers.MaxPool1D,
        layers.MaxPool2D,
        layers.MaxPool3D,
    ][len(shape) - 3]


def get_conv(shape):
    return [layers.Conv1D, layers.Conv2D, layers.Conv3D][len(shape) - 3]


def get_sep_conv(shape):
    return [  # pragma: no cover
        layers.SeparableConv1D,
        layers.SeparableConv2D,
        layers.Conv3D,
    ][len(shape) - 3]


================================================
FILE: autokeras/utils/types.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Callable
from typing import Dict
from typing import List
from typing import Union

import numpy as np
from keras.losses import Loss
from keras.metrics import Metric

DatasetType = Union[np.ndarray]
LossType = Union[str, Callable, Loss]
AcceptableMetric = Union[str, Callable, Metric]
MetricsType = Union[
    List[AcceptableMetric],
    List[List[AcceptableMetric]],
    Dict[str, AcceptableMetric],
]


================================================
FILE: autokeras/utils/utils.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import re

import keras
import keras_tuner
import tree

# Collect OOM exceptions from available libraries
oom_exceptions = []
try:
    import tensorflow as tf

    oom_exceptions.append(tf.errors.ResourceExhaustedError)  # pragma: no cover
except ImportError:  # pragma: no cover
    pass  # pragma: no cover

try:
    import torch

    oom_exceptions.append(torch.cuda.OutOfMemoryError)  # pragma: no cover
except ImportError:  # pragma: no cover
    pass  # pragma: no cover

try:
    import jax

    oom_exceptions.append(jax.errors.ResourceExhaustedError)  # pragma: no cover
except (ImportError, AttributeError):  # pragma: no cover
    pass  # pragma: no cover

oom_exceptions = tuple(oom_exceptions)


def validate_num_inputs(inputs, num):
    inputs = tree.flatten(inputs)
    if not len(inputs) == num:
        raise ValueError(
            "Expected {num} elements in the inputs list "
            "but received {len} inputs.".format(num=num, len=len(inputs))
        )


def to_snake_case(name):
    intermediate = re.sub("(.)([A-Z][a-z0-9]+)", r"\1_\2", name)
    insecure = re.sub("([a-z])([A-Z])", r"\1_\2", intermediate).lower()
    return insecure


def contain_instance(instance_list, instance_type):
    return any(
        [isinstance(instance, instance_type) for instance in instance_list]
    )


def evaluate_with_adaptive_batch_size(
    model, batch_size, verbose=1, **fit_kwargs
):
    return run_with_adaptive_batch_size(
        batch_size,
        lambda x, validation_data, **kwargs: model.evaluate(
            x, verbose=verbose, **kwargs
        ),
        **fit_kwargs,
    )


def predict_with_adaptive_batch_size(
    model, batch_size, verbose=1, **fit_kwargs
):
    return run_with_adaptive_batch_size(
        batch_size,
        lambda x, validation_data, **kwargs: model.predict(
            x, verbose=verbose, **kwargs
        ),
        **fit_kwargs,
    )


def fit_with_adaptive_batch_size(model, batch_size, **fit_kwargs):
    history = run_with_adaptive_batch_size(
        batch_size, lambda **kwargs: model.fit(**kwargs), **fit_kwargs
    )
    return model, history


def run_with_adaptive_batch_size(batch_size, func, **fit_kwargs):
    validation_data = None
    if "validation_data" in fit_kwargs:
        validation_data = fit_kwargs.pop("validation_data")
    while batch_size > 0:
        try:
            history = func(
                validation_data=validation_data,
                batch_size=batch_size,
                **fit_kwargs,
            )
            break
        except oom_exceptions as e:  # pragma: no cover
            if batch_size == 1:  # pragma: no cover
                raise e  # pragma: no cover
            batch_size //= 2  # pragma: no cover
            print(  # pragma: no cover
                "Not enough memory, reduce batch size to {batch_size}.".format(
                    batch_size=batch_size
                )
            )
    return history


def get_hyperparameter(value, hp, dtype):
    if value is None:
        return hp
    return value


def add_to_hp(hp, hps, name=None):
    """Add the HyperParameter (self) to the HyperParameters.

    # Arguments
        hp: keras_tuner.HyperParameters.
        name: String. If left unspecified, the hp name is used.
    """
    if not isinstance(hp, keras_tuner.engine.hyperparameters.HyperParameter):
        return hp
    kwargs = hp.get_config()
    if name is None:
        name = hp.name
    kwargs.pop("conditions")
    kwargs.pop("name")
    class_name = hp.__class__.__name__
    func = getattr(hps, class_name)
    return func(name=name, **kwargs)


def serialize_keras_object(obj):
    return keras.utils.serialize_keras_object(obj)  # pragma: no cover


def deserialize_keras_object(config, module_objects=None, custom_objects=None):
    return keras.utils.deserialize_keras_object(
        config, custom_objects, module_objects
    )


================================================
FILE: autokeras/utils/utils_test.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest
from keras_tuner.engine import hyperparameters

from autokeras.utils import utils


def test_validate_num_inputs_error():
    with pytest.raises(ValueError) as info:
        utils.validate_num_inputs([1, 2, 3], 2)

    assert "Expected 2 elements in the inputs list" in str(info.value)


def test_get_hyperparameter_with_none_return_hp():
    hp = utils.get_hyperparameter(
        None, hyperparameters.Choice("hp", [10, 20]), int
    )
    assert isinstance(hp, hyperparameters.Choice)


def test_get_hyperparameter_with_int_return_int():
    value = utils.get_hyperparameter(
        10, hyperparameters.Choice("hp", [10, 20]), int
    )
    assert isinstance(value, int)
    assert value == 10


def test_get_hyperparameter_with_hp_return_same():
    hp = utils.get_hyperparameter(
        hyperparameters.Choice("hp", [10, 30]),
        hyperparameters.Choice("hp", [10, 20]),
        int,
    )
    assert isinstance(hp, hyperparameters.Choice)


================================================
FILE: benchmark/README.md
================================================
Usage: python benchmark/run.py structured_data_classification report.csv


================================================
FILE: benchmark/__init__.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: benchmark/datasets/titanic_test.csv
================================================
sex,age,n_siblings_spouses,parch,fare,class,deck,embark_town,alone,survived
female,15,0,0,7.225,3,?,C,True,1
female,1,0,2,15.7417,3,?,C,False,1
male,20,1,1,15.7417,3,?,C,False,1
female,19,1,1,15.7417,3,?,C,False,1
male,33,0,0,8.05,3,?,S,True,0
male,,0,0,7.8958,3,?,S,True,0
male,,0,0,7.2292,3,?,C,True,0
female,,0,0,7.75,3,?,Q,True,0
male,,0,0,7.8958,3,?,S,True,0
male,12,1,0,11.2417,3,?,C,False,1
female,14,1,0,11.2417,3,?,C,False,1
female,29,0,0,7.925,3,?,S,True,0
male,28,0,0,8.05,3,?,S,True,0
female,18,0,0,7.775,3,?,S,True,1
female,26,0,0,7.8542,3,?,S,True,1
male,21,0,0,7.8542,3,?,S,True,0
male,41,0,0,7.125,3,?,S,True,0
male,39,0,0,7.925,3,?,S,True,1
male,21,0,0,7.8,3,?,S,True,0
male,28.5,0,0,7.2292,3,?,C,True,0
female,22,0,0,7.75,3,?,S,True,1
male,61,0,0,6.2375,3,?,S,True,0
male,,1,0,15.5,3,?,Q,False,0
male,,0,0,7.8292,3,?,Q,True,0
female,,1,0,15.5,3,?,Q,False,1
male,,0,0,7.7333,3,?,Q,True,0
male,,0,0,7.75,3,?,Q,True,0
male,,0,0,7.75,3,?,Q,True,0
male,23,0,0,9.225,3,?,S,True,0
female,,0,0,7.75,3,?,Q,True,0
female,,0,0,7.75,3,?,Q,True,1
female,,0,0,7.8792,3,?,Q,True,1
female,22,0,0,7.775,3,?,S,True,1
male,,0,0,7.75,3,?,Q,True,1
female,,0,0,7.8292,3,?,Q,True,1
male,9,0,1,3.1708,3,?,S,False,1
male,28,0,0,22.525,3,?,S,True,0
male,42,0,1,8.4042,3,?,S,False,0
male,,0,0,7.3125,3,?,S,True,0
female,31,0,0,7.8542,3,?,S,True,0
male,28,0,0,7.8542,3,?,S,True,0
male,32,0,0,7.775,3,?,S,True,1
male,20,0,0,9.225,3,?,S,True,0
female,23,0,0,8.6625,3,?,S,True,0
female,20,0,0,8.6625,3,?,S,True,0
male,20,0,0,8.6625,3,?,S,True,0
male,16,0,0,9.2167,3,?,S,True,0
female,31,0,0,8.6833,3,?,S,True,1
female,,0,0,7.6292,3,?,Q,True,0
male,2,3,1,21.075,3,?,S,False,0
male,6,3,1,21.075,3,?,S,False,0
female,3,3,1,21.075,3,?,S,False,0
female,8,3,1,21.075,3,?,S,False,0
female,29,0,4,21.075,3,?,S,False,0
male,1,4,1,39.6875,3,?,S,False,0
male,7,4,1,39.6875,3,?,S,False,0
male,2,4,1,39.6875,3,?,S,False,0
male,16,4,1,39.6875,3,?,S,False,0
male,14,4,1,39.6875,3,?,S,False,0
female,41,0,5,39.6875,3,?,S,False,0
male,21,0,0,8.6625,3,?,S,True,0
male,19,0,0,14.5,3,?,S,True,0
male,,0,0,8.7125,3,?,C,True,0
male,32,0,0,7.8958,3,?,S,True,0
male,0.75,1,1,13.775,3,?,S,False,0
female,3,1,1,13.775,3,?,S,False,0
female,26,0,2,13.775,3,?,S,False,0
male,,0,0,7,3,?,S,True,0
male,,0,0,7.775,3,?,S,True,0
male,,0,0,8.05,3,?,S,True,0
male,21,0,0,7.925,3,?,S,True,0
male,25,0,0,7.925,3,?,S,True,0
male,22,0,0,7.25,3,?,S,True,0
male,25,1,0,7.775,3,?,S,False,1
male,,1,1,22.3583,3,?,C,False,1
female,,1,1,22.3583,3,F E69,C,False,1
female,,0,2,22.3583,3,?,C,False,1
female,,0,0,8.1375,3,?,Q,True,0
male,24,0,0,8.05,3,?,S,True,0
female,28,0,0,7.8958,3,?,S,True,0
male,19,0,0,7.8958,3,?,S,True,0
male,,0,0,7.8958,3,?,S,True,0
male,25,1,0,7.775,3,?,S,False,0
female,18,0,0,7.775,3,?,S,True,0
male,32,0,0,8.05,3,E10,S,True,1
male,,0,0,7.8958,3,?,S,True,0
male,17,0,0,8.6625,3,?,S,True,0
male,24,0,0,8.6625,3,?,S,True,0
male,,0,0,7.8958,3,?,S,True,0
female,,0,0,8.1125,3,?,S,True,0
male,,0,0,7.2292,3,?,C,True,0
male,,0,0,7.25,3,?,S,True,0
male,38,0,0,7.8958,3,?,S,True,0
male,21,0,0,8.05,3,?,S,True,0
male,10,4,1,29.125,3,?,Q,False,0
male,4,4,1,29.125,3,?,Q,False,0
male,7,4,1,29.125,3,?,Q,False,0
male,2,4,1,29.125,3,?,Q,False,0
male,8,4,1,29.125,3,?,Q,False,0
female,39,0,5,29.125,3,?,Q,False,0
female,22,0,0,39.6875,3,?,S,True,0
male,35,0,0,7.125,3,?,S,True,0
female,,0,0,7.7208,3,?,Q,True,1
male,,0,0,14.5,3,?,S,True,0
female,,0,0,14.5,3,?,S,True,0
male,50,1,0,14.5,3,?,S,False,0
female,47,1,0,14.5,3,?,S,False,0
male,,0,0,8.05,3,?,S,True,0
male,,0,0,7.775,3,?,S,True,0
female,2,1,1,20.2125,3,?,S,False,0
male,18,1,1,20.2125,3,?,S,False,0
female,41,0,2,20.2125,3,?,S,False,0
female,,0,0,8.05,3,?,S,True,1
male,50,0,0,8.05,3,?,S,True,0
male,16,0,0,8.05,3,?,S,True,0
male,,0,0,7.75,3,?,Q,True,1
male,,0,0,24.15,3,?,Q,True,0
male,,0,0,7.2292,3,?,C,True,0
male,25,0,0,7.225,3,?,C,True,0
male,,0,0,7.225,3,?,C,True,0
male,,0,0,7.7292,3,?,Q,True,0
male,,0,0,7.575,3,?,S,True,0
male,38.5,0,0,7.25,3,?,S,True,0
male,,8,2,69.55,3,?,S,False,0
male,14.5,8,2,69.55,3,?,S,False,0
female,,8,2,69.55,3,?,S,False,0
female,,8,2,69.55,3,?,S,False,0
female,,8,2,69.55,3,?,S,False,0
female,,8,2,69.55,3,?,S,False,0
male,,8,2,69.55,3,?,S,False,0
male,,8,2,69.55,3,?,S,False,0
male,,8,2,69.55,3,?,S,False,0
male,,1,9,69.55,3,?,S,False,0
female,,1,9,69.55,3,?,S,False,0
male,24,0,0,9.325,3,?,S,True,0
female,21,0,0,7.65,3,?,S,True,1
male,39,0,0,7.925,3,?,S,True,0
male,,2,0,21.6792,3,?,C,False,0
male,,2,0,21.6792,3,?,C,False,0
male,,2,0,21.6792,3,?,C,False,0
female,1,1,1,16.7,3,G6,S,False,1
female,24,0,2,16.7,3,G6,S,False,1
female,4,1,1,16.7,3,G6,S,False,1
male,25,0,0,9.5,3,?,S,True,1
male,20,0,0,8.05,3,?,S,True,0
male,24.5,0,0,8.05,3,?,S,True,0
male,,0,0,7.725,3,?,Q,True,0
male,,0,0,7.8958,3,?,S,True,0
male,,0,0,7.75,3,?,Q,True,0
male,29,0,0,9.5,3,?,S,True,1
male,,0,0,15.1,3,?,S,True,0
female,,0,0,7.7792,3,?,Q,True,1
male,,0,0,8.05,3,?,S,True,0
male,,0,0,8.05,3,?,S,True,0
male,22,0,0,7.2292,3,?,C,True,0
male,,0,0,8.05,3,?,S,True,0
male,40,0,0,7.8958,3,?,S,True,0
male,21,0,0,7.925,3,?,S,True,0
female,18,0,0,7.4958,3,?,S,True,1
male,4,3,2,27.9,3,?,S,False,0
male,10,3,2,27.9,3,?,S,False,0
female,9,3,2,27.9,3,?,S,False,0
female,2,3,2,27.9,3,?,S,False,0
male,40,1,4,27.9,3,?,S,False,0
female,45,1,4,27.9,3,?,S,False,0
male,,0,0,7.8958,3,?,S,True,0
male,,0,0,8.05,3,?,S,True,0
male,,0,0,8.6625,3,?,S,True,0
male,,0,0,7.75,3,?,Q,True,0
female,,0,0,7.7333,3,?,Q,True,1
male,19,0,0,7.65,3,F G73,S,True,0
male,30,0,0,8.05,3,?,S,True,0
male,,0,0,8.05,3,?,S,True,0
male,32,0,0,8.05,3,?,S,True,0
male,,0,0,7.8958,3,?,S,True,0
male,33,0,0,8.6625,3,?,C,True,0
female,23,0,0,7.55,3,?,S,True,1
male,21,0,0,8.05,3,?,S,True,0
male,60.5,0,0,?,3,?,S,True,0
male,19,0,0,7.8958,3,?,S,True,0
female,22,0,0,9.8375,3,?,S,True,0
male,31,0,0,7.925,3,?,S,True,1
male,27,0,0,8.6625,3,?,S,True,0
female,2,0,1,10.4625,3,G6,S,False,0
female,29,1,1,10.4625,3,G6,S,False,0
male,16,0,0,8.05,3,?,S,True,1
male,44,0,0,7.925,3,?,S,True,1
male,25,0,0,7.05,3,?,S,True,0
male,74,0,0,7.775,3,?,S,True,0
male,14,0,0,9.225,3,?,S,True,1
male,24,0,0,7.7958,3,?,S,True,0
male,25,0,0,7.7958,3,?,S,True,1
male,34,0,0,8.05,3,?,S,True,0
male,0.4167,0,1,8.5167,3,?,C,False,1
male,,1,0,6.4375,3,?,C,False,0
male,,0,0,6.4375,3,?,C,True,0
male,,0,0,7.225,3,?,C,True,0
female,16,1,1,8.5167,3,?,C,False,1
male,,0,0,8.05,3,?,S,True,0
male,,1,0,16.1,3,?,S,False,0
female,,1,0,16.1,3,?,S,False,1
male,32,0,0,7.925,3,?,S,True,0
male,,0,0,7.75,3,F38,Q,True,0
male,,0,0,7.8958,3,?,S,True,0
male,30.5,0,0,8.05,3,?,S,True,0
male,44,0,0,8.05,3,?,S,True,0
male,,0,0,7.2292,3,?,C,True,0
male,25,0,0,0,3,?,S,True,1
male,,0,0,7.2292,3,?,C,True,0
male,7,1,1,15.2458,3,?,C,False,1
female,9,1,1,15.2458,3,?,C,False,1
female,29,0,2,15.2458,3,?,C,False,1
male,36,0,0,7.8958,3,?,S,True,0
female,18,0,0,9.8417,3,?,S,True,1
female,63,0,0,9.5875,3,?,S,True,1
male,,1,1,14.5,3,?,S,False,0
male,11.5,1,1,14.5,3,?,S,False,0
male,40.5,0,2,14.5,3,?,S,False,0
female,10,0,2,24.15,3,?,S,False,0
male,36,1,1,24.15,3,?,S,False,0
female,30,1,1,24.15,3,?,S,False,0
male,,0,0,9.5,3,?,S,True,0
male,33,0,0,9.5,3,?,S,True,0
male,28,0,0,9.5,3,?,S,True,0
male,28,0,0,9.5,3,?,S,True,0
male,47,0,0,9,3,?,S,True,0
female,18,2,0,18,3,?,S,False,0
male,31,3,0,18,3,?,S,False,0
male,16,2,0,18,3,?,S,False,0
female,31,1,0,18,3,?,S,False,0
male,22,0,0,7.225,3,?,C,True,1
male,20,0,0,7.8542,3,?,S,True,0
female,14,0,0,7.8542,3,?,S,True,0
male,22,0,0,7.8958,3,?,S,True,0
male,22,0,0,9,3,?,S,True,0
male,,0,0,8.05,3,?,S,True,0
male,,0,0,7.55,3,?,S,True,0
male,,0,0,8.05,3,?,S,True,0
male,32.5,0,0,9.5,3,?,S,True,0
female,38,0,0,7.2292,3,?,C,True,1
male,51,0,0,7.75,3,?,S,True,0
male,18,1,0,6.4958,3,?,S,False,0
male,21,1,0,6.4958,3,?,S,False,0
female,47,1,0,7,3,?,S,False,1
male,,0,0,8.7125,3,?,S,True,0
male,,0,0,7.55,3,?,S,True,0
male,,0,0,8.05,3,?,S,True,0
male,28.5,0,0,16.1,3,?,S,True,0
male,21,0,0,7.25,3,?,S,True,0
male,27,0,0,8.6625,3,?,S,True,0
male,,0,0,7.25,3,?,S,True,0
male,36,0,0,9.5,3,?,S,True,0
male,27,1,0,14.4542,3,?,C,False,0
female,15,1,0,14.4542,3,?,C,False,1
male,45.5,0,0,7.225,3,?,C,True,0
male,,0,0,7.225,3,?,C,True,0
male,,0,0,14.4583,3,?,C,True,0
female,14.5,1,0,14.4542,3,?,C,False,0
female,,1,0,14.4542,3,?,C,False,0
male,26.5,0,0,7.225,3,?,C,True,0
male,27,0,0,7.225,3,?,C,True,0
male,29,0,0,7.875,3,?,S,True,0


================================================
FILE: benchmark/datasets/titanic_train.csv
================================================
sex,age,n_siblings_spouses,parch,fare,class,deck,embark_town,alone,survived
female,29,0,0,211.3375,1,B5,S,True,1
male,0.9167,1,2,151.55,1,C22 C26,S,False,1
female,2,1,2,151.55,1,C22 C26,S,False,0
male,30,1,2,151.55,1,C22 C26,S,False,0
female,25,1,2,151.55,1,C22 C26,S,False,0
male,48,0,0,26.55,1,E12,S,True,1
female,63,1,0,77.9583,1,D7,S,False,1
male,39,0,0,0.0,1,A36,S,True,0
female,53,2,0,51.4792,1,C101,S,False,1
male,71,0,0,49.5042,1,?,C,True,0
male,47,1,0,227.525,1,C62 C64,C,False,0
female,18,1,0,227.525,1,C62 C64,C,False,1
female,24,0,0,69.3,1,B35,C,True,1
female,26,0,0,78.85,1,?,S,True,1
male,80,0,0,30.0,1,A23,S,True,1
male,,0,0,25.925,1,?,S,True,0
male,24,0,1,247.5208,1,B58 B60,C,False,0
female,50,0,1,247.5208,1,B58 B60,C,False,1
female,32,0,0,76.2917,1,D15,C,True,1
male,36,0,0,75.2417,1,C6,C,True,0
male,37,1,1,52.5542,1,D35,S,False,1
female,47,1,1,52.5542,1,D35,S,False,1
male,26,0,0,30.0,1,C148,C,True,1
female,42,0,0,227.525,1,?,C,True,1
female,29,0,0,221.7792,1,C97,S,True,1
male,25,0,0,26.0,1,?,C,True,0
male,25,1,0,91.0792,1,B49,C,False,1
female,19,1,0,91.0792,1,B49,C,False,1
female,35,0,0,135.6333,1,C99,S,True,1
male,28,0,0,26.55,1,C52,S,True,1
male,45,0,0,35.5,1,T,S,True,0
male,40,0,0,31.0,1,A31,C,True,1
female,30,0,0,164.8667,1,C7,S,True,1
female,58,0,0,26.55,1,C103,S,True,1
male,42,0,0,26.55,1,D22,S,True,0
female,45,0,0,262.375,1,?,C,True,1
female,22,0,1,55.0,1,E33,S,False,1
male,,0,0,26.55,1,?,S,True,1
male,41,0,0,30.5,1,A21,S,True,0
male,48,0,0,50.4958,1,B10,C,True,0
male,,0,0,39.6,1,?,C,True,0
female,44,0,0,27.7208,1,B4,C,True,1
female,59,2,0,51.4792,1,C101,S,False,1
female,60,0,0,76.2917,1,D15,C,True,1
female,41,0,0,134.5,1,E40,C,True,1
male,45,0,0,26.55,1,B38,S,True,0
male,,0,0,31.0,1,?,S,True,0
male,42,0,0,26.2875,1,E24,S,True,1
female,53,0,0,27.4458,1,?,C,True,1
male,36,0,1,512.3292,1,B51 B53 B55,C,False,1
female,58,0,1,512.3292,1,B51 B53 B55,C,False,1
male,33,0,0,5.0,1,B51 B53 B55,S,True,0
male,28,0,0,47.1,1,?,S,True,0
male,17,0,0,47.1,1,?,S,True,0
male,11,1,2,120.0,1,B96 B98,S,False,1
female,14,1,2,120.0,1,B96 B98,S,False,1
male,36,1,2,120.0,1,B96 B98,S,False,1
female,36,1,2,120.0,1,B96 B98,S,False,1
male,49,0,0,26.0,1,?,S,True,0
female,,0,0,27.7208,1,?,C,True,1
male,36,1,0,78.85,1,C46,S,False,0
female,76,1,0,78.85,1,C46,S,False,1
male,46,1,0,61.175,1,E31,S,False,0
female,47,1,0,61.175,1,E31,S,False,1
male,27,1,0,53.1,1,E8,S,False,1
female,33,1,0,53.1,1,E8,S,False,1
female,36,0,0,262.375,1,B61,C,True,1
female,30,0,0,86.5,1,B77,S,True,1
male,45,0,0,29.7,1,A9,C,True,1
female,,0,1,55.0,1,E33,S,False,1
male,,0,0,0.0,1,?,S,True,0
male,27,1,0,136.7792,1,C89,C,False,0
female,26,1,0,136.7792,1,C89,C,False,1
female,22,0,0,151.55,1,?,S,True,1
male,,0,0,52.0,1,A14,S,True,0
male,47,0,0,25.5875,1,E58,S,True,0
female,39,1,1,83.1583,1,E49,C,False,1
male,37,1,1,83.1583,1,E52,C,False,0
female,64,0,2,83.1583,1,E45,C,False,1
female,55,2,0,25.7,1,C101,S,False,1
male,,0,0,26.55,1,?,S,True,0
male,70,1,1,71.0,1,B22,S,False,0
female,36,0,2,71.0,1,B22,S,False,1
female,64,1,1,26.55,1,B26,S,False,1
male,39,1,0,71.2833,1,C85,C,False,0
female,38,1,0,71.2833,1,C85,C,False,1
male,51,0,0,26.55,1,E17,S,True,1
male,27,0,0,30.5,1,?,S,True,1
female,33,0,0,151.55,1,?,S,True,1
male,31,1,0,52.0,1,B71,S,False,0
female,27,1,2,52.0,1,B71,S,False,1
male,31,1,0,57.0,1,B20,S,False,1
female,17,1,0,57.0,1,B20,S,False,1
male,53,1,1,81.8583,1,A34,S,False,1
male,4,0,2,81.8583,1,A34,S,False,1
female,54,1,1,81.8583,1,A34,S,False,1
male,50,1,0,106.425,1,C86,C,False,0
female,27,1,1,247.5208,1,B58 B60,C,False,1
female,48,1,0,106.425,1,C86,C,False,1
female,48,1,0,39.6,1,A16,C,False,1
male,49,1,0,56.9292,1,A20,C,False,1
male,39,0,0,29.7,1,A18,C,True,0
female,23,0,1,83.1583,1,C54,C,False,1
female,38,0,0,227.525,1,C45,C,True,1
female,54,1,0,78.2667,1,D20,C,False,1
female,36,0,0,31.6792,1,A29,C,True,0
male,,0,0,221.7792,1,C95,S,True,0
female,,0,0,31.6833,1,?,S,True,1
female,,0,0,110.8833,1,?,C,True,1
male,36,0,0,26.3875,1,E25,S,True,1
male,30,0,0,27.75,1,C111,C,True,0
female,24,3,2,263.0,1,C23 C25 C27,S,False,1
female,28,3,2,263.0,1,C23 C25 C27,S,False,1
female,23,3,2,263.0,1,C23 C25 C27,S,False,1
male,19,3,2,263.0,1,C23 C25 C27,S,False,0
male,64,1,4,263.0,1,C23 C25 C27,S,False,0
female,60,1,4,263.0,1,C23 C25 C27,S,False,1
female,30,0,0,56.9292,1,E36,C,True,1
male,,0,0,26.55,1,D34,S,True,0
male,50,2,0,133.65,1,?,S,False,1
male,43,1,0,27.7208,1,D40,C,False,1
female,,1,0,133.65,1,?,S,False,1
female,22,0,2,49.5,1,B39,C,False,1
male,60,1,1,79.2,1,B41,C,False,1
female,48,1,1,79.2,1,B41,C,False,1
male,,0,0,0.0,1,B102,S,True,0
male,37,1,0,53.1,1,C123,S,False,0
female,35,1,0,53.1,1,C123,S,False,1
male,47,0,0,38.5,1,E63,S,True,0
female,35,0,0,211.5,1,C130,C,True,1
female,22,0,1,59.4,1,?,C,False,1
female,45,0,1,59.4,1,?,C,False,1
male,24,0,0,79.2,1,B86,C,True,0
male,49,1,0,89.1042,1,C92,C,False,1
female,,1,0,89.1042,1,C92,C,False,1
male,71,0,0,34.6542,1,A5,C,True,0
male,53,0,0,28.5,1,C51,C,True,1
female,19,0,0,30.0,1,B42,S,True,1
male,38,0,1,153.4625,1,C91,S,False,0
female,58,0,1,153.4625,1,C125,S,False,1
male,23,0,1,63.3583,1,D10 D12,C,False,1
female,45,0,1,63.3583,1,D10 D12,C,False,1
male,46,0,0,79.2,1,B82 B84,C,True,0
male,25,1,0,55.4417,1,E50,C,False,1
female,25,1,0,55.4417,1,E50,C,False,1
male,48,1,0,76.7292,1,D33,C,False,1
female,49,1,0,76.7292,1,D33,C,False,1
male,,0,0,42.4,1,?,S,True,0
male,45,1,0,83.475,1,C83,S,False,0
female,35,1,0,83.475,1,C83,S,False,1
male,40,0,0,0.0,1,B94,S,True,0
male,27,0,0,76.7292,1,D49,C,True,1
male,,0,0,30.0,1,D45,S,True,1
female,24,0,0,83.1583,1,C54,C,True,1
male,55,1,1,93.5,1,B69,S,False,0
female,52,1,1,93.5,1,B69,S,False,1
male,42,0,0,42.5,1,B11,S,True,0
male,,0,0,51.8625,1,E46,S,True,0
male,55,0,0,50.0,1,C39,S,True,0
female,16,0,1,57.9792,1,B18,C,False,1
female,44,0,1,57.9792,1,B18,C,False,1
female,51,1,0,77.9583,1,D11,S,False,1
male,42,1,0,52.0,1,?,S,False,0
female,35,1,0,52.0,1,?,S,False,1
male,35,0,0,26.55,1,?,C,True,1
male,38,1,0,90.0,1,C93,S,False,1
male,,0,0,30.6958,1,?,C,True,0
female,35,1,0,90.0,1,C93,S,False,1
female,38,0,0,80.0,1,B28,?,True,1
female,50,0,0,28.7125,1,C49,C,True,0
male,49,0,0,0.0,1,B52 B54 B56,S,True,1
male,46,0,0,26.0,1,?,S,True,0
male,50,0,0,26.0,1,E60,S,True,0
male,32.5,0,0,211.5,1,C132,C,True,0
male,58,0,0,29.7,1,B37,C,True,0
male,41,1,0,51.8625,1,D21,S,False,0
female,,1,0,51.8625,1,D21,S,False,1
male,42,1,0,52.5542,1,D19,S,False,1
female,45,1,0,52.5542,1,D19,S,False,1
male,,0,0,26.55,1,C124,S,True,0
female,39,0,0,211.3375,1,?,S,True,1
female,49,0,0,25.9292,1,D17,S,True,1
female,30,0,0,106.425,1,?,C,True,1
male,35,0,0,512.3292,1,B101,C,True,1
male,,0,0,27.7208,1,?,C,True,0
male,42,0,0,26.55,1,?,S,True,0
female,55,0,0,27.7208,1,?,C,True,1
female,16,0,1,39.4,1,D28,S,False,1
female,51,0,1,39.4,1,D28,S,False,1
male,29,0,0,30.0,1,D6,S,True,0
female,21,0,0,77.9583,1,D9,S,True,1
male,30,0,0,45.5,1,?,S,True,0
female,58,0,0,146.5208,1,B80,C,True,1
female,15,0,1,211.3375,1,B5,S,False,1
male,30,0,0,26.0,1,C106,S,True,0
female,16,0,0,86.5,1,B79,S,True,1
male,,0,0,29.7,1,C47,C,True,1
male,19,1,0,53.1,1,D30,S,False,0
female,18,1,0,53.1,1,D30,S,False,1
female,24,0,0,49.5042,1,C90,C,True,1
male,46,0,0,75.2417,1,C6,C,True,0
male,54,0,0,51.8625,1,E46,S,True,0
male,36,0,0,26.2875,1,E25,S,True,1
male,28,1,0,82.1708,1,?,C,False,0
female,,1,0,82.1708,1,?,C,False,1
male,65,0,0,26.55,1,E38,S,True,0
male,44,2,0,90.0,1,C78,Q,False,0
female,33,1,0,90.0,1,C78,Q,False,1
female,37,1,0,90.0,1,C78,Q,False,1
male,30,1,0,57.75,1,C78,C,False,1
male,55,0,0,30.5,1,C30,S,True,0
male,47,0,0,42.4,1,?,S,True,0
male,37,0,1,29.7,1,C118,C,False,0
female,31,1,0,113.275,1,D36,C,False,1
female,23,1,0,113.275,1,D36,C,False,1
male,58,0,2,113.275,1,D48,C,False,0
female,19,0,2,26.2833,1,D47,S,False,1
male,64,0,0,26.0,1,?,S,True,0
female,39,0,0,108.9,1,C105,C,True,1
male,,0,0,25.7417,1,?,C,True,1
female,22,0,1,61.9792,1,B36,C,False,1
male,65,0,1,61.9792,1,B30,C,False,0
male,28.5,0,0,27.7208,1,D43,C,True,0
male,,0,0,0.0,1,?,S,True,0
male,45.5,0,0,28.5,1,C124,S,True,0
male,23,0,0,93.5,1,B24,S,True,0
male,29,1,0,66.6,1,C2,S,False,0
female,22,1,0,66.6,1,C2,S,False,1
male,18,1,0,108.9,1,C65,C,False,0
female,17,1,0,108.9,1,C65,C,False,1
female,30,0,0,93.5,1,B73,S,True,1
male,52,0,0,30.5,1,C104,S,True,1
male,47,0,0,52.0,1,C110,S,True,0
female,56,0,1,83.1583,1,C50,C,False,1
male,38,0,0,0.0,1,?,S,True,0
male,,0,0,39.6,1,?,S,True,1
male,22,0,0,135.6333,1,?,C,True,0
male,,0,0,227.525,1,?,C,True,0
female,43,0,1,211.3375,1,B3,S,False,1
male,31,0,0,50.4958,1,A24,S,True,0
male,45,0,0,26.55,1,?,S,True,1
male,,0,0,50.0,1,A32,S,True,0
female,33,0,0,27.7208,1,A11,C,True,1
male,46,0,0,79.2,1,?,C,True,0
male,36,0,0,40.125,1,A10,C,True,0
female,33,0,0,86.5,1,B77,S,True,1
male,55,1,0,59.4,1,?,C,False,0
female,54,1,0,59.4,1,?,C,False,1
male,33,0,0,26.55,1,?,S,True,0
male,13,2,2,262.375,1,B57 B59 B63 B66,C,False,1
female,18,2,2,262.375,1,B57 B59 B63 B66,C,False,1
female,21,2,2,262.375,1,B57 B59 B63 B66,C,False,1
male,61,1,3,262.375,1,B57 B59 B63 B66,C,False,0
female,48,1,3,262.375,1,B57 B59 B63 B66,C,False,1
male,,0,0,30.5,1,C106,S,True,1
female,24,0,0,69.3,1,B35,C,True,1
male,,0,0,26.0,1,?,S,True,1
female,35,1,0,57.75,1,C28,C,False,1
female,30,0,0,31.0,1,?,C,True,1
male,34,0,0,26.55,1,?,S,True,1
female,40,0,0,153.4625,1,C125,S,True,1
male,35,0,0,26.2875,1,E24,S,True,1
male,50,1,0,55.9,1,E44,S,False,0
female,39,1,0,55.9,1,E44,S,False,1
male,56,0,0,35.5,1,A26,C,True,1
male,28,0,0,35.5,1,A6,S,True,1
male,56,0,0,26.55,1,?,S,True,0
male,56,0,0,30.6958,1,A7,C,True,0
male,24,1,0,60.0,1,C31,S,False,0
male,,0,0,26.0,1,A19,S,True,0
female,18,1,0,60.0,1,C31,S,False,1
male,24,1,0,82.2667,1,B45,S,False,1
female,23,1,0,82.2667,1,B45,S,False,1
male,6,0,2,134.5,1,E34,C,False,1
male,45,1,1,134.5,1,E34,C,False,1
female,40,1,1,134.5,1,E34,C,False,1
male,57,1,0,146.5208,1,B78,C,False,0
female,,1,0,146.5208,1,B78,C,False,1
male,32,0,0,30.5,1,B50,C,True,1
male,62,0,0,26.55,1,C87,S,True,0
male,54,1,0,55.4417,1,C116,C,False,1
female,43,1,0,55.4417,1,C116,C,False,1
female,52,1,0,78.2667,1,D20,C,False,1
male,,0,0,27.7208,1,?,C,True,0
female,62,0,0,80.0,1,B28,?,True,1
male,67,1,0,221.7792,1,C55 C57,S,False,0
female,63,1,0,221.7792,1,C55 C57,S,False,0
male,61,0,0,32.3208,1,D50,S,True,0
female,48,0,0,25.9292,1,D17,S,True,1
female,18,0,2,79.65,1,E68,S,False,1
male,52,1,1,79.65,1,E67,S,False,0
female,39,1,1,79.65,1,E67,S,False,1
male,48,1,0,52.0,1,C126,S,False,1
female,,1,0,52.0,1,C126,S,False,1
male,49,1,1,110.8833,1,C68,C,False,0
male,17,0,2,110.8833,1,C70,C,False,1
female,39,1,1,110.8833,1,C68,C,False,1
female,,0,0,79.2,1,?,C,True,1
male,31,0,0,28.5375,1,C53,C,True,1
male,40,0,0,27.7208,1,?,C,True,0
male,61,0,0,33.5,1,B19,S,True,0
male,47,0,0,34.0208,1,D46,S,True,0
female,35,0,0,512.3292,1,?,C,True,1
male,64,1,0,75.25,1,D37,C,False,0
female,60,1,0,75.25,1,D37,C,False,1
male,60,0,0,26.55,1,?,S,True,0
male,54,0,1,77.2875,1,D26,S,False,0
male,21,0,1,77.2875,1,D26,S,False,0
female,55,0,0,135.6333,1,C32,C,True,1
female,31,0,2,164.8667,1,C7,S,False,1
male,57,1,1,164.8667,1,?,S,False,0
female,45,1,1,164.8667,1,?,S,False,1
male,50,1,1,211.5,1,C80,C,False,0
male,27,0,2,211.5,1,C82,C,False,0
female,50,1,1,211.5,1,C80,C,False,1
female,21,0,0,26.55,1,?,S,True,1
male,51,0,1,61.3792,1,?,C,False,0
male,21,0,1,61.3792,1,?,C,False,1
male,,0,0,35.0,1,C128,S,True,0
female,31,0,0,134.5,1,E39 E41,C,True,1
male,,0,0,35.5,1,C52,S,True,1
male,62,0,0,26.55,1,?,S,True,0
female,36,0,0,135.6333,1,C32,C,True,1
male,30,1,0,24.0,2,?,C,False,0
female,28,1,0,24.0,2,?,C,False,1
male,30,0,0,13.0,2,?,S,True,0
male,18,0,0,11.5,2,?,S,True,0
male,25,0,0,10.5,2,?,S,True,0
male,34,1,0,26.0,2,?,S,False,0
female,36,1,0,26.0,2,?,S,False,1
male,57,0,0,13.0,2,?,S,True,0
male,18,0,0,11.5,2,?,S,True,0
male,23,0,0,10.5,2,?,S,True,0
female,36,0,0,13.0,2,D,S,True,1
male,28,0,0,10.5,2,?,S,True,0
male,51,0,0,12.525,2,?,S,True,0
male,32,1,0,26.0,2,?,S,False,1
female,19,1,0,26.0,2,?,S,False,1
male,28,0,0,26.0,2,?,S,True,0
male,1,2,1,39.0,2,F4,S,False,1
female,4,2,1,39.0,2,F4,S,False,1
female,12,2,1,39.0,2,F4,S,False,1
female,36,0,3,39.0,2,F4,S,False,1
male,34,0,0,13.0,2,D56,S,True,1
female,19,0,0,13.0,2,?,S,True,1
male,23,0,0,13.0,2,?,S,True,0
male,26,0,0,13.0,2,?,S,True,0
male,42,0,0,13.0,2,?,S,True,0
male,27,0,0,13.0,2,?,S,True,0
female,24,0,0,13.0,2,F33,S,True,1
female,15,0,2,39.0,2,?,S,False,1
male,60,1,1,39.0,2,?,S,False,0
female,40,1,1,39.0,2,?,S,False,1
female,20,1,0,26.0,2,?,S,False,1
male,25,1,0,26.0,2,?,S,False,0
female,36,0,0,13.0,2,?,S,True,1
male,25,0,0,13.0,2,?,S,True,0
male,42,0,0,13.0,2,?,S,True,0
female,42,0,0,13.0,2,?,S,True,1
male,0.8333,0,2,29.0,2,?,S,False,1
male,26,1,1,29.0,2,?,S,False,1
female,22,1,1,29.0,2,?,S,False,1
female,35,0,0,21.0,2,?,S,True,1
male,,0,0,0.0,2,?,S,True,0
male,19,0,0,13.0,2,?,S,True,0
female,44,1,0,26.0,2,?,S,False,0
male,54,1,0,26.0,2,?,S,False,0
male,52,0,0,13.5,2,?,S,True,0
male,37,1,0,26.0,2,?,S,False,0
female,29,1,0,26.0,2,?,S,False,0
female,25,1,1,30.0,2,?,S,False,1
female,45,0,2,30.0,2,?,S,False,1
male,29,1,0,26.0,2,?,S,False,0
female,28,1,0,26.0,2,?,S,False,1
male,29,0,0,10.5,2,?,S,True,0
male,28,0,0,13.0,2,?,S,True,0
male,24,0,0,10.5,2,?,S,True,1
female,8,0,2,26.25,2,?,S,False,1
male,31,1,1,26.25,2,?,S,False,0
female,31,1,1,26.25,2,?,S,False,1
female,22,0,0,10.5,2,F33,S,True,1
female,30,0,0,13.0,2,?,S,True,0
female,,0,0,21.0,2,?,S,True,0
male,21,0,0,11.5,2,?,S,True,0
male,,0,0,0.0,2,?,S,True,0
male,8,1,1,36.75,2,?,S,False,1
male,18,0,0,73.5,2,?,S,True,0
female,48,0,2,36.75,2,?,S,False,1
female,28,0,0,13.0,2,?,S,True,1
male,32,0,0,13.0,2,?,S,True,0
male,17,0,0,73.5,2,?,S,True,0
male,29,1,0,27.7208,2,?,C,False,0
female,24,1,0,27.7208,2,?,C,False,1
male,25,0,0,31.5,2,?,S,True,0
male,18,0,0,73.5,2,?,S,True,0
female,18,0,1,23.0,2,?,S,False,1
female,34,0,1,23.0,2,?,S,False,1
male,54,0,0,26.0,2,?,S,True,0
male,8,0,2,32.5,2,?,S,False,1
male,42,1,1,32.5,2,?,S,False,0
female,34,1,1,32.5,2,?,S,False,1
female,27,1,0,13.8583,2,?,C,False,1
female,30,1,0,13.8583,2,?,C,False,1
male,23,0,0,13.0,2,?,S,True,0
male,21,0,0,13.0,2,?,S,True,0
male,18,0,0,13.0,2,?,S,True,0
male,40,1,0,26.0,2,?,S,False,0
female,29,1,0,26.0,2,?,S,False,1
male,18,0,0,10.5,2,?,S,True,0
male,36,0,0,13.0,2,?,S,True,0
male,,0,0,0.0,2,?,S,True,0
female,38,0,0,13.0,2,?,S,True,0
male,35,0,0,26.0,2,?,S,True,0
male,38,1,0,21.0,2,?,S,False,0
male,34,1,0,21.0,2,?,S,False,0
female,34,0,0,13.0,2,?,S,True,1
male,16,0,0,26.0,2,?,S,True,0
male,26,0,0,10.5,2,?,S,True,0
male,47,0,0,10.5,2,?,S,True,0
male,21,1,0,11.5,2,?,S,False,0
male,21,1,0,11.5,2,?,S,False,0
male,24,0,0,13.5,2,?,S,True,0
male,24,0,0,13.0,2,?,S,True,0
male,34,0,0,13.0,2,?,S,True,0
male,30,0,0,13.0,2,?,S,True,0
male,52,0,0,13.0,2,?,S,True,0
male,30,0,0,13.0,2,?,S,True,0
male,0.6667,1,1,14.5,2,?,S,False,1
female,24,0,2,14.5,2,?,S,False,1
male,44,0,0,13.0,2,?,S,True,0
female,6,0,1,33.0,2,?,S,False,1
male,28,0,1,33.0,2,?,S,False,0
male,62,0,0,10.5,2,?,S,True,1
male,30,0,0,10.5,2,?,S,True,0
female,7,0,2,26.25,2,?,S,False,1
male,43,1,1,26.25,2,?,S,False,0
female,45,1,1,26.25,2,?,S,False,1
female,24,1,2,65.0,2,?,S,False,1
female,24,1,2,65.0,2,?,S,False,1
male,49,1,2,65.0,2,?,S,False,0
female,48,1,2,65.0,2,?,S,False,1
female,55,0,0,16.0,2,?,S,True,1
male,24,2,0,73.5,2,?,S,False,0
male,32,2,0,73.5,2,?,S,False,0
male,21,2,0,73.5,2,?,S,False,0
female,18,1,1,13.0,2,?,S,False,0
female,20,2,1,23.0,2,?,S,False,1
male,23,2,1,11.5,2,?,S,False,0
male,36,0,0,13.0,2,?,S,True,0
female,54,1,3,23.0,2,?,S,False,1
male,50,0,0,13.0,2,?,S,True,0
male,44,1,0,26.0,2,?,S,False,0
female,29,1,0,26.0,2,?,S,False,1
male,21,0,0,73.5,2,?,S,True,0
male,42,0,0,13.0,2,?,S,True,1
male,63,1,0,26.0,2,?,S,False,0
female,60,1,0,26.0,2,?,S,False,0
male,33,0,0,12.275,2,?,S,True,0
female,17,0,0,10.5,2,?,S,True,1
male,42,1,0,27.0,2,?,S,False,0
female,24,2,1,27.0,2,?,S,False,1
male,47,0,0,15.0,2,?,S,True,0
male,24,2,0,31.5,2,?,S,False,0
male,22,2,0,31.5,2,?,S,False,0
male,32,0,0,10.5,2,?,S,True,0
female,23,0,0,13.7917,2,D,C,True,1
male,34,1,0,26.0,2,?,S,False,0
female,24,1,0,26.0,2,?,S,False,1
female,22,0,0,21.0,2,?,S,True,0
female,,0,0,12.35,2,E101,Q,True,1
male,35,0,0,12.35,2,?,Q,True,0
female,45,0,0,13.5,2,?,S,True,1
male,57,0,0,12.35,2,?,Q,True,0
male,,0,0,0.0,2,?,S,True,0
male,31,0,0,10.5,2,?,S,True,0
female,26,1,1,26.0,2,?,S,False,0
male,30,1,1,26.0,2,?,S,False,0
male,,0,0,10.7083,2,?,Q,True,0
female,1,1,2,41.5792,2,?,C,False,1
female,3,1,2,41.5792,2,?,C,False,1
male,25,1,2,41.5792,2,?,C,False,0
female,22,1,2,41.5792,2,?,C,False,1
female,17,0,0,12.0,2,?,C,True,1
female,,0,0,33.0,2,?,S,True,1
female,34,0,0,10.5,2,F33,S,True,1
male,36,0,0,12.875,2,D,C,True,0
male,24,0,0,10.5,2,?,S,True,0
male,61,0,0,12.35,2,?,Q,True,0
male,50,1,0,26.0,2,?,S,False,0
female,42,1,0,26.0,2,?,S,False,1
female,57,0,0,10.5,2,E77,S,True,0
male,,0,0,15.0458,2,D,C,True,0
male,1,0,2,37.0042,2,?,C,False,1
male,31,1,1,37.0042,2,?,C,False,0
female,24,1,1,37.0042,2,?,C,False,1
male,,0,0,15.5792,2,?,C,True,0
male,30,0,0,13.0,2,?,S,True,0
male,40,0,0,16.0,2,?,S,True,0
male,32,0,0,13.5,2,?,S,True,0
male,30,0,0,13.0,2,?,S,True,0
male,46,0,0,26.0,2,?,S,True,0
female,13,0,1,19.5,2,?,S,False,1
female,41,0,1,19.5,2,?,S,False,1
male,19,0,0,10.5,2,?,S,True,1
male,39,0,0,13.0,2,?,S,True,0
male,48,0,0,13.0,2,?,S,True,0
male,70,0,0,10.5,2,?,S,True,0
male,27,0,0,13.0,2,?,S,True,0
male,54,0,0,14.0,2,?,S,True,0
male,39,0,0,26.0,2,?,S,True,0
male,16,0,0,10.5,2,?,S,True,0
male,62,0,0,9.6875,2,?,Q,True,0
male,32.5,1,0,30.0708,2,?,C,False,0
female,14,1,0,30.0708,2,?,C,False,1
male,2,1,1,26.0,2,F2,S,False,1
male,3,1,1,26.0,2,F2,S,False,1
male,36.5,0,2,26.0,2,F2,S,False,0
male,26,0,0,13.0,2,F2,S,True,0
male,19,1,1,36.75,2,?,S,False,0
male,28,0,0,13.5,2,?,S,True,0
male,20,0,0,13.8625,2,D38,C,True,1
female,29,0,0,10.5,2,F33,S,True,1
male,39,0,0,13.0,2,?,S,True,0
male,22,0,0,10.5,2,?,S,True,1
male,,0,0,13.8625,2,?,C,True,1
male,23,0,0,10.5,2,?,S,True,0
male,29,0,0,13.8583,2,?,C,True,1
male,28,0,0,10.5,2,?,S,True,0
male,,0,0,0.0,2,?,S,True,0
female,50,0,1,26.0,2,?,S,False,1
male,19,0,0,10.5,2,?,S,True,0
male,,0,0,15.05,2,?,C,True,0
male,41,0,0,13.0,2,?,S,True,0
female,21,0,1,21.0,2,?,S,False,1
female,19,0,0,26.0,2,?,S,True,1
male,43,0,1,21.0,2,?,S,False,0
female,32,0,0,13.0,2,?,S,True,1
male,34,0,0,13.0,2,?,S,True,0
male,30,0,0,12.7375,2,?,C,True,1
male,27,0,0,15.0333,2,?,C,True,0
female,2,1,1,26.0,2,?,S,False,1
female,8,1,1,26.0,2,?,S,False,1
female,33,0,2,26.0,2,?,S,False,1
male,36,0,0,10.5,2,?,S,True,0
male,34,1,0,21.0,2,?,S,False,0
female,30,3,0,21.0,2,?,S,False,1
female,28,0,0,13.0,2,?,S,True,1
male,23,0,0,15.0458,2,?,C,True,0
male,0.8333,1,1,18.75,2,?,S,False,1
male,3,1,1,18.75,2,?,S,False,1
female,24,2,3,18.75,2,?,S,False,1
female,50,0,0,10.5,2,?,S,True,1
male,19,0,0,10.5,2,?,S,True,0
female,21,0,0,10.5,2,?,S,True,1
male,26,0,0,13.0,2,?,S,True,0
male,25,0,0,13.0,2,?,S,True,0
male,27,0,0,26.0,2,?,S,True,0
female,25,0,1,26.0,2,?,S,False,1
female,18,0,2,13.0,2,?,S,False,1
female,20,0,0,36.75,2,?,S,True,1
female,30,0,0,13.0,2,?,S,True,1
male,59,0,0,13.5,2,?,S,True,0
female,30,0,0,12.35,2,?,Q,True,1
male,35,0,0,10.5,2,?,S,True,0
female,40,0,0,13.0,2,?,S,True,1
male,25,0,0,13.0,2,?,S,True,0
male,41,0,0,15.0458,2,?,C,True,0
male,25,0,0,10.5,2,?,S,True,0
male,18.5,0,0,13.0,2,F,S,True,0
male,14,0,0,65.0,2,?,S,True,0
female,50,0,0,10.5,2,?,S,True,1
male,23,0,0,13.0,2,?,S,True,0
female,28,0,0,12.65,2,?,S,True,1
female,27,0,0,10.5,2,E101,S,True,1
male,29,1,0,21.0,2,?,S,False,0
female,27,1,0,21.0,2,?,S,False,0
male,40,0,0,13.0,2,?,S,True,0
female,31,0,0,21.0,2,?,S,True,1
male,30,1,0,21.0,2,?,S,False,0
male,23,1,0,10.5,2,?,S,False,0
female,31,0,0,21.0,2,?,S,True,1
male,,0,0,0.0,2,?,S,True,0
female,12,0,0,15.75,2,?,S,True,1
female,40,0,0,15.75,2,?,S,True,1
female,32.5,0,0,13.0,2,E101,S,True,1
male,27,1,0,26.0,2,?,S,False,0
female,29,1,0,26.0,2,?,S,False,1
male,2,1,1,23.0,2,?,S,False,1
female,4,1,1,23.0,2,?,S,False,1
female,29,0,2,23.0,2,?,S,False,1
female,0.9167,1,2,27.75,2,?,S,False,1
female,5,1,2,27.75,2,?,S,False,1
male,36,1,2,27.75,2,?,S,False,0
female,33,1,2,27.75,2,?,S,False,1
male,66,0,0,10.5,2,?,S,True,0
male,,0,0,12.875,2,?,S,True,0
male,31,0,0,13.0,2,?,S,True,1
male,,0,0,13.0,2,?,S,True,1
female,26,0,0,13.5,2,?,S,True,1
female,24,0,0,13.0,2,?,S,True,0
male,42,0,0,7.55,3,?,S,True,0
male,13,0,2,20.25,3,?,S,False,0
male,16,1,1,20.25,3,?,S,False,0
female,35,1,1,20.25,3,?,S,False,1
female,16,0,0,7.65,3,?,S,True,1
male,25,0,0,7.65,3,F G63,S,True,1
male,20,0,0,7.925,3,?,S,True,1
female,18,0,0,7.2292,3,?,C,True,1
male,30,0,0,7.25,3,?,S,True,0
male,26,0,0,8.05,3,?,S,True,0
female,40,1,0,9.475,3,?,S,False,0
male,0.8333,0,1,9.35,3,?,S,False,1
female,18,0,1,9.35,3,?,S,False,1
male,26,0,0,18.7875,3,?,C,True,1
male,26,0,0,7.8875,3,?,S,True,0
male,20,0,0,7.925,3,?,S,True,0
male,24,0,0,7.05,3,?,S,True,0
male,25,0,0,7.05,3,?,S,True,0
male,35,0,0,8.05,3,?,S,True,0
male,18,0,0,8.3,3,?,S,True,0
male,32,0,0,22.525,3,?,S,True,0
female,19,1,0,7.8542,3,?,S,False,1
male,4,4,2,31.275,3,?,S,False,0
female,6,4,2,31.275,3,?,S,False,0
female,2,4,2,31.275,3,?,S,False,0
female,17,4,2,7.925,3,?,S,False,1
female,38,4,2,7.775,3,?,S,False,0
female,9,4,2,31.275,3,?,S,False,0
female,11,4,2,31.275,3,?,S,False,0
male,39,1,5,31.275,3,?,S,False,0
male,27,0,0,7.7958,3,?,S,True,1
male,26,0,0,7.775,3,?,S,True,0
female,39,1,5,31.275,3,?,S,False,0
male,20,0,0,7.8542,3,?,S,True,0
male,26,0,0,7.8958,3,?,S,True,0
male,25,1,0,17.8,3,?,S,False,0
female,18,1,0,17.8,3,?,S,False,0
male,24,0,0,7.775,3,?,S,True,0
male,35,0,0,7.05,3,?,S,True,0
male,5,4,2,31.3875,3,?,S,False,0
male,9,4,2,31.3875,3,?,S,False,0
male,3,4,2,31.3875,3,?,S,False,1
male,13,4,2,31.3875,3,?,S,False,0
female,5,4,2,31.3875,3,?,S,False,1
male,40,1,5,31.3875,3,?,S,False,0
male,23,0,0,7.7958,3,?,S,True,1
female,38,1,5,31.3875,3,?,S,False,1
female,45,0,0,7.225,3,?,C,True,1
male,21,0,0,7.225,3,?,C,True,0
male,23,0,0,7.05,3,?,S,True,0
female,17,0,0,14.4583,3,?,C,True,0
male,30,0,0,7.225,3,?,C,True,0
male,23,0,0,7.8542,3,?,S,True,0
female,13,0,0,7.2292,3,?,C,True,1
male,20,0,0,7.225,3,?,C,True,0
male,32,1,0,15.85,3,?,S,False,0
female,33,3,0,15.85,3,?,S,False,1
female,0.75,2,1,19.2583,3,?,C,False,1
female,0.75,2,1,19.2583,3,?,C,False,1
female,5,2,1,19.2583,3,?,C,False,1
female,24,0,3,19.2583,3,?,C,False,1
female,18,0,0,8.05,3,?,S,True,1
male,40,0,0,7.225,3,?,C,True,0
male,26,0,0,7.8958,3,?,S,True,0
male,20,0,0,7.2292,3,?,C,True,1
female,18,0,1,14.4542,3,?,C,False,0
female,45,0,1,14.4542,3,?,C,False,0
female,27,0,0,7.8792,3,?,Q,True,0
male,22,0,0,8.05,3,?,S,True,0
male,19,0,0,8.05,3,?,S,True,0
male,26,0,0,7.775,3,?,S,True,0
male,22,0,0,9.35,3,?,S,True,0
male,,0,0,7.2292,3,?,C,True,0
male,20,0,0,4.0125,3,?,C,True,0
male,32,0,0,56.4958,3,?,S,True,1
male,21,0,0,7.775,3,?,S,True,0
male,18,0,0,7.75,3,?,S,True,0
male,26,0,0,7.8958,3,?,S,True,0
male,6,1,1,15.2458,3,?,C,False,0
female,9,1,1,15.2458,3,?,C,False,0
male,,0,0,7.225,3,?,C,True,0
female,,0,2,15.2458,3,?,C,False,0
female,,0,2,7.75,3,?,Q,False,0
male,40,1,1,15.5,3,?,Q,False,0
female,32,1,1,15.5,3,?,Q,False,0
male,21,0,0,16.1,3,?,S,True,0
female,22,0,0,7.725,3,?,Q,True,1
female,20,0,0,7.8542,3,?,S,True,0
male,29,1,0,7.0458,3,?,S,False,0
male,22,1,0,7.25,3,?,S,False,0
male,22,0,0,7.7958,3,?,S,True,0
male,35,0,0,8.05,3,?,S,True,0
female,18.5,0,0,7.2833,3,?,Q,True,0
male,21,0,0,7.8208,3,?,Q,True,1
male,19,0,0,6.75,3,?,Q,True,0
female,18,0,0,7.8792,3,?,Q,True,0
female,21,0,0,8.6625,3,?,S,True,0
female,30,0,0,8.6625,3,?,S,True,0
male,18,0,0,8.6625,3,?,S,True,0
male,38,0,0,8.6625,3,?,S,True,0
male,17,0,0,8.6625,3,?,S,True,0
male,17,0,0,8.6625,3,?,S,True,0
female,21,0,0,7.75,3,?,Q,True,0
male,21,0,0,7.75,3,?,Q,True,0
male,21,0,0,8.05,3,?,S,True,0
male,,1,0,14.4583,3,?,C,False,0
female,,1,0,14.4583,3,?,C,False,0
male,28,0,0,7.7958,3,?,S,True,0
male,24,0,0,7.8542,3,?,S,True,0
female,16,0,0,7.75,3,?,Q,True,1
female,37,0,0,7.75,3,?,Q,True,0
male,28,0,0,7.25,3,?,S,True,0
male,24,0,0,8.05,3,?,S,True,0
male,21,0,0,7.7333,3,?,Q,True,0
male,32,0,0,56.4958,3,?,S,True,1
male,29,0,0,8.05,3,?,S,True,0
male,26,1,0,14.4542,3,?,C,False,0
male,18,1,0,14.4542,3,?,C,False,0
male,20,0,0,7.05,3,?,S,True,0
male,18,0,0,8.05,3,?,S,True,1
male,24,0,0,7.25,3,?,Q,True,0
male,36,0,0,7.4958,3,?,S,True,0
male,24,0,0,7.4958,3,?,S,True,0
male,31,0,0,7.7333,3,?,Q,True,0
male,31,0,0,7.75,3,?,Q,True,0
female,22,0,0,7.75,3,?,Q,True,1
female,30,0,0,7.6292,3,?,Q,True,0
male,70.5,0,0,7.75,3,?,Q,True,0
male,43,0,0,8.05,3,?,S,True,0
male,35,0,0,7.8958,3,?,S,True,0
male,27,0,0,7.8958,3,?,S,True,0
male,19,0,0,7.8958,3,?,S,True,0
male,30,0,0,8.05,3,?,S,True,0
male,9,1,1,15.9,3,?,S,False,1
male,3,1,1,15.9,3,?,S,False,1
female,36,0,2,15.9,3,?,S,False,1
male,59,0,0,7.25,3,?,S,True,0
male,19,0,0,8.1583,3,?,S,True,0
female,17,0,1,16.1,3,?,S,False,1
male,44,0,1,16.1,3,?,S,False,0
male,17,0,0,8.6625,3,?,S,True,0
male,22.5,0,0,7.225,3,?,C,True,0
male,45,0,0,8.05,3,?,S,True,1
female,22,0,0,10.5167,3,?,S,True,0
male,19,0,0,10.1708,3,?,S,True,0
female,30,0,0,6.95,3,?,Q,True,1
male,29,0,0,7.75,3,?,Q,True,1
male,0.3333,0,2,14.4,3,?,S,False,0
male,34,1,1,14.4,3,?,S,False,0
female,28,1,1,14.4,3,?,S,False,0
male,27,0,0,7.8958,3,?,S,True,0
male,25,0,0,7.8958,3,?,S,True,0
male,24,2,0,24.15,3,?,S,False,0
male,22,0,0,8.05,3,?,S,True,0
male,21,2,0,24.15,3,?,S,False,0
male,17,2,0,8.05,3,?,S,False,0
male,,1,0,16.1,3,?,S,False,0
female,,1,0,16.1,3,?,S,False,1
male,36.5,1,0,17.4,3,?,S,False,1
female,36,1,0,17.4,3,?,S,False,1
male,30,0,0,9.5,3,?,S,True,1
male,16,0,0,9.5,3,?,S,True,0
male,1,1,2,20.575,3,?,S,False,1
female,0.1667,1,2,20.575,3,?,S,False,1
male,26,1,2,20.575,3,?,S,False,0
female,33,1,2,20.575,3,?,S,False,1
male,25,0,0,7.8958,3,?,S,True,0
male,,0,0,7.8958,3,?,S,True,0
male,,0,0,7.8958,3,?,S,True,0
male,22,0,0,7.25,3,?,S,True,0
male,36,0,0,7.25,3,?,S,True,0
female,19,0,0,7.8792,3,?,Q,True,1
male,17,0,0,7.8958,3,?,S,True,0
male,42,0,0,8.6625,3,?,S,True,0
male,43,0,0,7.8958,3,?,S,True,0
male,,0,0,7.2292,3,?,C,True,0
male,32,0,0,7.75,3,?,Q,True,0
male,19,0,0,8.05,3,?,S,True,1
female,30,0,0,12.475,3,?,S,True,1
female,24,0,0,7.75,3,?,Q,True,0
female,23,0,0,8.05,3,?,S,True,1
male,33,0,0,7.8958,3,?,C,True,0
male,65,0,0,7.75,3,?,Q,True,0
male,24,0,0,7.55,3,?,S,True,1
male,23,1,0,13.9,3,?,S,False,0
female,22,1,0,13.9,3,?,S,False,1
male,18,0,0,7.775,3,?,S,True,0
male,16,0,0,7.775,3,?,S,True,0
male,45,0,0,6.975,3,?,S,True,0
male,,0,0,7.225,3,?,C,True,0
male,39,0,2,7.2292,3,?,C,False,0
male,17,1,1,7.2292,3,?,C,False,0
male,15,1,1,7.2292,3,?,C,False,0
male,47,0,0,7.25,3,?,S,True,0
female,5,0,0,12.475,3,?,S,True,1
male,,0,0,7.225,3,?,C,True,0
male,40.5,0,0,15.1,3,?,S,True,0
male,40.5,0,0,7.75,3,?,Q,True,0
male,,0,0,7.05,3,?,S,True,1
male,18,0,0,7.7958,3,?,S,True,0
female,,0,0,7.75,3,?,Q,True,0
male,,0,0,7.75,3,?,Q,True,0
male,,0,0,6.95,3,?,Q,True,0
male,26,0,0,7.8792,3,?,Q,True,0
male,,0,0,7.75,3,?,Q,True,0
male,,0,0,56.4958,3,?,S,True,1
female,21,2,2,34.375,3,?,S,False,0
female,9,2,2,34.375,3,?,S,False,0
male,,0,0,8.05,3,?,S,True,0
male,18,2,2,34.375,3,?,S,False,0
male,16,1,3,34.375,3,?,S,False,0
female,48,1,3,34.375,3,?,S,False,0
male,,0,0,7.75,3,?,Q,True,0
male,,0,0,7.25,3,?,S,True,0
male,25,0,0,7.7417,3,?,Q,True,0
male,,0,0,14.5,3,?,S,True,0
male,,0,0,7.8958,3,?,C,True,0
male,22,0,0,8.05,3,?,S,True,0
female,16,0,0,7.7333,3,?,Q,True,1
female,,0,0,7.75,3,?,Q,True,1
male,9,0,2,20.525,3,?,S,False,1
male,33,1,1,20.525,3,?,S,False,0
male,41,0,0,7.85,3,?,S,True,0
female,31,1,1,20.525,3,?,S,False,1
male,38,0,0,7.05,3,?,S,True,0
male,9,5,2,46.9,3,?,S,False,0
male,1,5,2,46.9,3,?,S,False,0
male,11,5,2,46.9,3,?,S,False,0
female,10,5,2,46.9,3,?,S,False,0
female,16,5,2,46.9,3,?,S,False,0
male,14,5,2,46.9,3,?,S,False,0
male,40,1,6,46.9,3,?,S,False,0
female,43,1,6,46.9,3,?,S,False,0
male,51,0,0,8.05,3,?,S,True,0
male,32,0,0,8.3625,3,?,S,True,0
male,,0,0,8.05,3,?,S,True,0
male,20,0,0,9.8458,3,?,S,True,0
male,37,2,0,7.925,3,?,S,False,0
male,28,2,0,7.925,3,?,S,False,0
male,19,0,0,7.775,3,?,S,True,0
female,24,0,0,8.85,3,?,S,True,0
female,17,0,0,7.7333,3,?,Q,True,0
male,,1,0,19.9667,3,?,S,False,0
male,,1,0,19.9667,3,?,S,False,0
male,28,1,0,15.85,3,?,S,False,0
female,24,1,0,15.85,3,?,S,False,1
male,20,0,0,9.5,3,?,S,True,0
male,23.5,0,0,7.2292,3,?,C,True,0
male,41,2,0,14.1083,3,?,S,False,0
male,26,1,0,7.8542,3,?,S,False,0
male,21,0,0,7.8542,3,?,S,True,0
female,45,1,0,14.1083,3,?,S,False,1
female,,0,0,7.55,3,?,S,True,0
male,25,0,0,7.25,3,?,S,True,0
male,,0,0,6.8583,3,?,Q,True,0
male,11,0,0,18.7875,3,?,C,True,0
female,,0,0,7.75,3,?,Q,True,1
male,27,0,0,6.975,3,?,S,True,1
male,,0,0,56.4958,3,?,S,True,1
female,18,0,0,6.75,3,?,Q,True,0
female,26,0,0,7.925,3,?,S,True,1
female,23,0,0,7.925,3,?,S,True,0
female,22,0,0,8.9625,3,?,S,True,1
male,28,0,0,7.8958,3,?,S,True,0
female,28,0,0,7.775,3,?,S,True,0
female,,0,0,7.75,3,?,Q,True,0
female,2,0,1,12.2875,3,?,S,False,1
female,22,1,1,12.2875,3,?,S,False,1
male,43,0,0,6.45,3,?,S,True,0
male,28,0,0,22.525,3,?,S,True,0
female,27,0,0,7.925,3,?,S,True,1
male,,0,0,7.75,3,?,Q,True,0
female,,0,0,8.05,3,?,S,True,1
male,42,0,0,7.65,3,F G63,S,True,0
male,,0,0,7.8875,3,?,S,True,1
male,30,0,0,7.2292,3,?,C,True,0
male,,0,0,7.8958,3,?,S,True,0
female,27,1,0,7.925,3,?,S,False,0
female,25,1,0,7.925,3,?,S,False,0
male,,0,0,7.8958,3,?,S,True,0
male,29,0,0,7.8958,3,?,C,True,1
male,21,0,0,7.7958,3,?,S,True,1
male,,0,0,7.05,3,?,S,True,0
male,20,0,0,7.8542,3,?,S,True,0
male,48,0,0,7.8542,3,?,S,True,0
male,17,1,0,7.0542,3,?,S,False,0
female,,0,0,7.75,3,?,Q,True,1
male,,0,0,8.1125,3,?,S,True,1
male,34,0,0,6.4958,3,?,S,True,0
male,26,0,0,7.775,3,?,S,True,1
male,22,0,0,7.7958,3,?,S,True,0
male,33,0,0,8.6542,3,?,S,True,0
male,31,0,0,7.775,3,?,S,True,0
male,29,0,0,7.8542,3,?,S,True,0
male,4,1,1,11.1333,3,?,S,False,1
female,1,1,1,11.1333,3,?,S,False,1
male,49,0,0,0.0,3,?,S,True,0
male,33,0,0,7.775,3,?,S,True,0
male,19,0,0,0.0,3,?,S,True,0
female,27,0,2,11.1333,3,?,S,False,1
male,,1,2,23.45,3,?,S,False,0
female,,1,2,23.45,3,?,S,False,0
male,,1,2,23.45,3,?,S,False,0
female,,1,2,23.45,3,?,S,False,0
male,23,0,0,7.8958,3,?,S,True,0
male,32,0,0,7.8542,3,?,S,True,1
male,27,0,0,7.8542,3,?,S,True,0
female,20,1,0,9.825,3,?,S,False,0
female,21,1,0,9.825,3,?,S,False,0
male,32,0,0,7.925,3,?,S,True,1
male,17,0,0,7.125,3,?,S,True,0
male,21,0,0,8.4333,3,?,S,True,0
male,30,0,0,7.8958,3,?,S,True,0
male,21,0,0,7.7958,3,?,S,True,1
male,33,0,0,7.8542,3,?,S,True,0
male,22,0,0,7.5208,3,?,S,True,0
female,4,0,1,13.4167,3,?,C,False,1
male,39,0,1,13.4167,3,?,C,False,1
male,,0,0,7.2292,3,?,C,True,0
male,18.5,0,0,7.2292,3,?,C,True,0
male,,0,0,7.75,3,?,Q,True,0
male,,0,0,7.25,3,?,S,True,0
female,,0,0,7.75,3,?,Q,True,1
female,,0,0,7.75,3,?,Q,True,1
male,34.5,0,0,7.8292,3,?,Q,True,0
male,44,0,0,8.05,3,?,S,True,0
male,,0,0,7.75,3,?,Q,True,1
male,,1,0,14.4542,3,?,C,False,0
female,,1,0,14.4542,3,?,C,False,0
male,,1,0,7.75,3,?,Q,False,0
male,,1,0,7.75,3,?,Q,False,0
male,,0,0,7.7375,3,?,Q,True,0
female,22,2,0,8.6625,3,?,S,False,0
male,26,2,0,8.6625,3,?,S,False,0
female,4,0,2,22.025,3,?,S,False,1
male,29,3,1,22.025,3,?,S,False,1
female,26,1,1,22.025,3,?,S,False,1
female,1,1,1,12.1833,3,?,S,False,0
male,18,1,1,7.8542,3,?,S,False,0
female,36,0,2,12.1833,3,?,S,False,0
male,,0,0,7.8958,3,?,C,True,0
male,25,0,0,7.2292,3,F E57,C,True,1
male,,0,0,7.225,3,?,C,True,0
female,37,0,0,9.5875,3,?,S,True,0
male,,0,0,7.8958,3,?,S,True,0
male,,0,0,56.4958,3,?,S,True,1
male,,0,0,56.4958,3,?,S,True,0
female,22,0,0,7.25,3,?,S,True,1
male,,0,0,7.75,3,?,Q,True,0
male,26,0,0,56.4958,3,?,S,True,1
male,29,0,0,9.4833,3,?,S,True,0
male,29,0,0,7.775,3,?,S,True,0
male,22,0,0,7.775,3,?,S,True,0
male,22,0,0,7.225,3,?,C,True,1
male,,3,1,25.4667,3,?,S,False,0
female,,3,1,25.4667,3,?,S,False,0
female,,3,1,25.4667,3,?,S,False,0
female,,3,1,25.4667,3,?,S,False,0
female,,0,4,25.4667,3,?,S,False,0
male,32,0,0,7.925,3,?,S,True,0
male,34.5,0,0,6.4375,3,?,C,True,0
female,,1,0,15.5,3,?,Q,False,0
male,,1,0,15.5,3,?,Q,False,0
male,36,0,0,0.0,3,?,S,True,0
male,39,0,0,24.15,3,?,S,True,0
male,24,0,0,9.5,3,?,S,True,0
female,25,0,0,7.775,3,?,S,True,0
female,45,0,0,7.75,3,?,S,True,0
male,36,1,0,15.55,3,?,S,False,0
female,30,1,0,15.55,3,?,S,False,0
male,20,1,0,7.925,3,?,S,False,1
male,,0,0,7.8792,3,?,Q,True,0
male,28,0,0,56.4958,3,?,S,True,0
male,,0,0,7.55,3,?,S,True,0
male,30,1,0,16.1,3,?,S,False,0
female,26,1,0,16.1,3,?,S,False,0
male,,0,0,7.8792,3,?,S,True,0
male,20.5,0,0,7.25,3,?,S,True,0
male,27,0,0,8.6625,3,?,S,True,1
male,51,0,0,7.0542,3,?,S,True,0
female,23,0,0,7.8542,3,?,S,True,1
male,32,0,0,7.5792,3,?,S,True,1
male,,0,0,7.8958,3,?,S,True,0
male,,0,0,7.55,3,?,S,True,0
female,,0,0,7.75,3,?,Q,True,1
male,24,0,0,7.1417,3,?,S,True,1
male,22,0,0,7.125,3,?,S,True,0
female,,0,0,7.8792,3,?,Q,True,0
male,,0,0,7.75,3,?,Q,True,0
male,,0,0,8.05,3,?,S,True,0
male,29,0,0,7.925,3,?,S,True,0
male,,0,0,7.2292,3,?,C,True,1
female,30.5,0,0,7.75,3,?,Q,True,0
female,,0,0,7.7375,3,?,Q,True,1
male,,0,0,7.2292,3,F E46,C,True,0
male,35,0,0,7.8958,3,?,C,True,0
male,33,0,0,7.8958,3,?,S,True,0
female,,0,0,7.225,3,?,C,True,1
male,,0,0,7.8958,3,?,C,True,0
female,,0,0,7.75,3,?,Q,True,1
male,,0,0,7.75,3,?,Q,True,1
female,,2,0,23.25,3,?,Q,False,1
female,,2,0,23.25,3,?,Q,False,1
male,,2,0,23.25,3,?,Q,False,1
female,,0,0,7.7875,3,?,Q,True,1
male,,0,0,15.5,3,?,Q,True,0
female,,0,0,7.8792,3,?,Q,True,1
female,15,0,0,8.0292,3,?,Q,True,1
female,35,0,0,7.75,3,?,Q,True,0
male,,0,0,7.75,3,?,Q,True,0
male,24,1,0,16.1,3,?,S,False,0
female,19,1,0,16.1,3,?,S,False,0
female,,0,0,7.75,3,?,Q,True,0
female,,0,0,8.05,3,?,S,True,0
female,,0,0,8.05,3,?,S,True,0
male,55.5,0,0,8.05,3,?,S,True,0
male,,0,0,7.75,3,?,Q,True,0
male,21,0,0,7.775,3,?,S,True,1
male,,0,0,8.05,3,?,S,True,0
male,24,0,0,7.8958,3,?,S,True,0
male,21,0,0,7.8958,3,?,S,True,0
male,28,0,0,7.8958,3,?,S,True,0
male,,0,0,7.8958,3,?,S,True,0
female,,0,0,7.8792,3,?,Q,True,1
male,25,0,0,7.65,3,F G73,S,True,0
male,6,0,1,12.475,3,E121,S,False,1
female,27,0,1,12.475,3,E121,S,False,1
male,,0,0,8.05,3,?,S,True,0
female,,1,0,24.15,3,?,Q,False,1
male,,1,0,24.15,3,?,Q,False,0
male,,0,0,8.4583,3,?,Q,True,0
male,34,0,0,8.05,3,?,S,True,0
male,,0,0,7.75,3,?,Q,True,0
male,,0,0,7.775,3,?,S,True,1
male,,1,1,15.2458,3,?,C,False,1
male,,1,1,15.2458,3,?,C,False,1
female,,0,2,15.2458,3,?,C,False,1
female,,0,0,7.2292,3,?,C,True,1
male,,0,0,8.05,3,?,S,True,0
female,,0,0,7.7333,3,?,Q,True,1
female,24,0,0,7.75,3,?,Q,True,1
male,,0,0,8.05,3,?,S,True,0
female,,1,0,15.5,3,?,Q,False,1
female,,1,0,15.5,3,?,Q,False,1
female,,0,0,15.5,3,?,Q,True,1
male,18,0,0,7.75,3,?,S,True,0
male,22,0,0,7.8958,3,?,S,True,0


================================================
FILE: benchmark/experiments/__init__.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from benchmark.experiments import structured_data


def get_experiments(task_name):
    if task_name == "structured_data_classification":
        return [
            structured_data.Titanic(),
            structured_data.Iris(),
            structured_data.Wine(),
        ]


================================================
FILE: benchmark/experiments/experiment.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import shutil
import timeit


class Experiment(object):
    def __init__(self, name, tmp_dir="tmp_dir"):
        self.name = name
        self.tmp_dir = tmp_dir

    def get_auto_model(self):
        raise NotImplementedError

    @staticmethod
    def load_data():
        raise NotImplementedError

    def run_once(self):
        (x_train, y_train), (x_test, y_test) = self.load_data()
        auto_model = self.get_auto_model()

        start_time = timeit.default_timer()
        auto_model.fit(x_train, y_train)
        stop_time = timeit.default_timer()

        accuracy = auto_model.evaluate(x_test, y_test)[1]
        total_time = stop_time - start_time

        return total_time, accuracy

    def run(self, repeat_times=1):
        total_times = []
        metric_values = []
        for i in range(repeat_times):
            total_time, metric = self.run_once()
            total_times.append(total_time)
            metric_values.append(metric)
            self.tear_down()
        return total_times, metric_values

    def tear_down(self):
        shutil.rmtree(self.tmp_dir)


================================================
FILE: benchmark/experiments/image.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from keras import datasets

import autokeras as ak
from benchmark.experiments import experiment


class ImageClassifierExperiment(experiment.Experiment):
    def get_auto_model(self):
        return ak.ImageClassifier(
            max_trials=10, directory=self.tmp_dir, overwrite=True
        )


class MNIST(ImageClassifierExperiment):
    def __init__(self):
        super().__init__(name="MNIST")

    @staticmethod
    def load_data():
        return datasets.mnist.load_data()


class CIFAR10(ImageClassifierExperiment):
    def __init__(self):
        super().__init__(name="CIFAR10")

    @staticmethod
    def load_data():
        return datasets.cifar10.load_data()


================================================
FILE: benchmark/experiments/structured_data.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import keras
import numpy as np
import pandas as pd
import sklearn

import autokeras as ak
from benchmark.experiments import experiment


class StructuredDataClassifierExperiment(experiment.Experiment):
    def get_auto_model(self):
        return ak.StructuredDataClassifier(
            max_trials=10, directory=self.tmp_dir, overwrite=True
        )


class Titanic(StructuredDataClassifierExperiment):
    def __init__(self):
        super().__init__(name="Titanic")

    @staticmethod
    def load_data():
        TRAIN_DATA_URL = (
            "https://storage.googleapis.com/tf-datasets/titanic/train.csv"
        )
        TEST_DATA_URL = (
            "https://storage.googleapis.com/tf-datasets/titanic/eval.csv"
        )
        x_train = keras.utils.get_file("titanic_train.csv", TRAIN_DATA_URL)
        x_test = keras.utils.get_file("titanic_eval.csv", TEST_DATA_URL)

        return (x_train, "survived"), (x_test, "survived")


class Iris(StructuredDataClassifierExperiment):
    def __init__(self):
        super().__init__(name="Iris")

    @staticmethod
    def load_data():
        # Prepare the dataset.
        TRAIN_DATA_URL = (
            "https://storage.googleapis.com/"
            "download.tensorflow.org/data/iris_training.csv"
        )
        x_train = keras.utils.get_file("iris_train.csv", TRAIN_DATA_URL)

        TEST_DATA_URL = (
            "https://storage.googleapis.com/"
            "download.tensorflow.org/data/iris_test.csv"
        )
        x_test = keras.utils.get_file("iris_test.csv", TEST_DATA_URL)

        return (x_train, "virginica"), (x_test, "virginica")


class Wine(StructuredDataClassifierExperiment):
    def __init__(self):
        super().__init__(name="Wine")

    @staticmethod
    def load_data():
        DATASET_URL = (
            "https://archive.ics.uci.edu/ml/"
            "machine-learning-databases/wine/wine.data"
        )

        # save data
        dataset = keras.utils.get_file("wine.csv", DATASET_URL)

        data = pd.read_csv(dataset, header=None).sample(frac=1, random_state=5)
        split_length = int(data.shape[0] * 0.8)  # 141

        return (data.iloc[:split_length, 1:], data.iloc[:split_length, 0]), (
            data.iloc[split_length:, 1:],
            data.iloc[split_length:, 0],
        )


class StructuredDataRegressorExperiment(experiment.Experiment):
    def get_auto_model(self):
        return ak.StructuredDataRegressor(
            max_trials=10, directory=self.tmp_dir, overwrite=True
        )


class CaliforniaHousing(StructuredDataRegressorExperiment):
    @staticmethod
    def load_data():
        house_dataset = sklearn.datasets.fetch_california_housing()
        (
            x_train,
            x_test,
            y_train,
            y_test,
        ) = sklearn.model_selection.train_test_split(
            house_dataset.data,
            np.array(house_dataset.target),
            test_size=0.2,
            random_state=42,
        )
        return (x_train, y_train), (x_test, y_test)


================================================
FILE: benchmark/experiments/text.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

import keras
import numpy as np
from sklearn.datasets import load_files

import autokeras as ak
from benchmark.experiments import experiment


class IMDB(experiment.Experiment):
    def __init__(self):
        super().__init__(name="IMDB")

    def get_auto_model(self):
        return ak.TextClassifier(
            max_trials=10, directory=self.tmp_dir, overwrite=True
        )

    @staticmethod
    def load_data():
        dataset = keras.utils.get_file(
            fname="aclImdb.tar.gz",
            origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz",  # noqa: E501
            extract=True,
        )

        # set path to dataset
        IMDB_DATADIR = os.path.join(os.path.dirname(dataset), "aclImdb")

        classes = ["pos", "neg"]
        train_data = load_files(
            os.path.join(IMDB_DATADIR, "train"),
            shuffle=True,
            categories=classes,
        )
        test_data = load_files(
            os.path.join(IMDB_DATADIR, "test"),
            shuffle=False,
            categories=classes,
        )

        x_train = np.array(train_data.data)
        y_train = np.array(train_data.target)
        x_test = np.array(test_data.data)
        y_test = np.array(test_data.target)
        return (x_train, y_train), (x_test, y_test)


================================================
FILE: benchmark/performance.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os

import keras
import numpy as np
from keras.datasets import cifar10
from keras.datasets import mnist
from sklearn.datasets import load_files

import autokeras as ak


def imdb_raw(num_instances=100):
    dataset = keras.utils.get_file(
        fname="aclImdb.tar.gz",
        origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz",
        extract=True,
    )

    # set path to dataset
    IMDB_DATADIR = os.path.join(os.path.dirname(dataset), "aclImdb")

    classes = ["pos", "neg"]
    train_data = load_files(
        os.path.join(IMDB_DATADIR, "train"), shuffle=True, categories=classes
    )
    test_data = load_files(
        os.path.join(IMDB_DATADIR, "test"), shuffle=False, categories=classes
    )

    x_train = np.array(train_data.data)
    y_train = np.array(train_data.target)
    x_test = np.array(test_data.data)
    y_test = np.array(test_data.target)

    if num_instances is not None:
        x_train = x_train[:num_instances]
        y_train = y_train[:num_instances]
        x_test = x_test[:num_instances]
        y_test = y_test[:num_instances]
    return (x_train, y_train), (x_test, y_test)


def test_mnist_accuracy_over_98(tmp_path):
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    clf = ak.ImageClassifier(max_trials=1, directory=tmp_path)
    clf.fit(x_train, y_train, epochs=10)
    accuracy = clf.evaluate(x_test, y_test)[1]
    assert accuracy >= 0.98


def test_cifar10_accuracy_over_93(tmp_path):
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    clf = ak.ImageClassifier(max_trials=3, directory=tmp_path)
    clf.fit(x_train, y_train, epochs=5)
    accuracy = clf.evaluate(x_test, y_test)[1]
    assert accuracy >= 0.93


def test_imdb_accuracy_over_92(tmp_path):
    (x_train, y_train), (x_test, y_test) = imdb_raw(num_instances=None)
    clf = ak.TextClassifier(max_trials=3, directory=tmp_path)
    clf.fit(x_train, y_train, batch_size=6, epochs=1)
    accuracy = clf.evaluate(x_test, y_test)[1]
    assert accuracy >= 0.92


def test_titaninc_accuracy_over_77(tmp_path):
    TRAIN_DATA_URL = (
        "https://storage.googleapis.com/tf-datasets/titanic/train.csv"
    )
    TEST_DATA_URL = (
        "https://storage.googleapis.com/tf-datasets/titanic/eval.csv"
    )

    train_file_path = keras.utils.get_file("train.csv", TRAIN_DATA_URL)
    test_file_path = keras.utils.get_file("eval.csv", TEST_DATA_URL)
    clf = ak.StructuredDataClassifier(max_trials=10, directory=tmp_path)

    clf.fit(train_file_path, "survived")

    accuracy = clf.evaluate(test_file_path, "survived")[1]
    assert accuracy >= 0.77


================================================
FILE: benchmark/run.py
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import statistics
import sys

from benchmark import experiments as exp_module


def generate_report(experiments):
    report = [
        ",".join(
            [
                "dataset_name",
                "average_time",
                "metrics_average",
                "metrics_standard_deviation",
            ]
        )
    ]
    for experiment in experiments:
        total_times, metric_values = experiment.run(repeat_times=10)
        mean_time = statistics.mean(total_times)
        mean = statistics.mean(metric_values)
        std = statistics.stdev(metric_values)
        report.append(
            ",".join([experiment.name, str(mean_time), str(mean), str(std)])
        )
    return "\n".join(report)


def main(argv):
    task = sys.argv[1]
    path = sys.argv[2]
    report = generate_report(exp_module.get_experiments(task))
    with open(path, "w") as file:
        file.write(report)


if __name__ == "__main__":
    main(sys.argv)


================================================
FILE: codecov.yml
================================================
coverage:
  status:
    project:
      default:
        target: 100%
    patch:
      default:
        target: 100%


================================================
FILE: docker/Dockerfile
================================================
FROM tensorflow/tensorflow:2.3.0
WORKDIR /opt/autokeras
COPY . .
RUN python -m pip install --no-cache-dir --editable .
WORKDIR /work


================================================
FILE: docker/Makefile
================================================
help:
	@cat Makefile

DATA?="${HOME}/Data"
GPUS?=all
DOCKER_FILE?=Dockerfile
DOCKER=docker
TF_VERSION=2.3.0
TEST=tests/
SRC?=$(shell dirname `pwd`)

build:
	docker build -t autokeras --build-arg TF_VERSION=$(TF_VERSION) -f $(DOCKER_FILE) .

bash: build
	$(DOCKER) run --gpus $(GPUS) -it -v $(SRC):/src/workspace -v $(DATA):/data autokeras bash

ipython: build
	$(DOCKER) run --gpus $(GPUS) -it -v $(SRC):/src/workspace -v $(DATA):/data --env  autokeras ipython

notebook: build
	$(DOCKER) run --gpus $(GPUS) -it -v $(SRC):/src/workspace -v $(DATA):/data --net=host --env autokeras

test: build
	$(DOCKER) run --gpus $(GPUS) -it -v $(SRC):/src/workspace -v $(DATA):/data --env  autokeras py.test $(TEST)


================================================
FILE: docker/README.md
================================================
# Using Autokeras via Docker

This directory contains `Dockerfile` to make it easy to get up and running with
Autokeras via [Docker](http://www.docker.com/).

## Installing Docker

General installation instructions are
[on the Docker site](https://docs.docker.com/installation/), but we give some
quick links here:

* [OSX](https://docs.docker.com/installation/mac/): [docker toolbox](https://www.docker.com/toolbox)
* [ubuntu](https://docs.docker.com/installation/ubuntulinux/)

## Running the container

We are using `Makefile` to simplify docker commands within make commands.

Build the container and start a Jupyter Notebook

    $ make notebook

Build the container and start an iPython shell

    $ make ipython

Build the container and start a bash

    $ make bash

For GPU support install NVIDIA drivers (ideally latest) and
[nvidia-docker](https://github.com/NVIDIA/nvidia-docker). Run using

    $ make notebook GPU=all # or [ipython, bash]

Mount a volume for external data sets

    $ make DATA=~/mydata

Prints all make tasks

    $ make help

Note: If you would have a problem running nvidia-docker you may try the old way
we have used. But it is not recommended. If you find a bug in the nvidia-docker report
it there please and try using the nvidia-docker as described above.

    $ export CUDA_SO=$(\ls /usr/lib/x86_64-linux-gnu/libcuda.* | xargs -I{} echo '-v {}:{}')
    $ export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
    $ docker run -it -p 8888:8888 $CUDA_SO $DEVICES gcr.io/tensorflow/tensorflow:latest-gpu


================================================
FILE: docker/devel.Dockerfile
================================================
FROM ubuntu:18.04

RUN mkdir /app

WORKDIR /app


RUN apt-get update && apt-get install -y --no-install-recommends \
        python3.6 \
        curl \
        ca-certificates \
        build-essential \
        python3.6-dev \
        python3-distutils \
        git \
        gcc \
        && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/*

RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
    python3.6 get-pip.py && \
    rm get-pip.py

RUN git clone https://github.com/keras-team/autokeras.git

RUN python3.6 -m pip install -U keras

RUN cd autokeras && python3.6 -m pip install -U . && cd ..

RUN rm -rf autokeras

CMD /bin/bash


================================================
FILE: docker/pre-commit.Dockerfile
================================================
FROM python:3.7

RUN pip install flake8 black isort

WORKDIR /autokeras
CMD ["python", "docker/pre_commit.py"]


================================================
FILE: docker/pre_commit.py
================================================
from subprocess import CalledProcessError
from subprocess import check_call


def check_bash_call(string):
    check_call(["bash", "-c", string])


def _run_format_and_flake8():
    files_changed = False

    try:
        check_bash_call("sh shell/lint.sh")
    except CalledProcessError:
        check_bash_call("sh shell/format.sh")
        files_changed = True

    if files_changed:
        print("Some files have changed.")
        print("Please do git add and git commit again")
    else:
        print("No formatting needed.")

    if files_changed:
        exit(1)


def run_format_and_flake8():
    try:
        _run_format_and_flake8()
    except CalledProcessError as error:
        print("Pre-commit returned exit code", error.returncode)
        exit(error.returncode)


if __name__ == "__main__":
    run_format_and_flake8()


================================================
FILE: docs/README.md
================================================
# AutoKeras Documentation

The source for AutoKeras documentation is in this directory.
Our documentation uses extended Markdown, as implemented by [MkDocs](http://mkdocs.org).

## Building the documentation

- Install dependencies: `pip install -r docs/requirements.txt`
- `pip install -e .` to make sure that Python will import your modified version of AutoKeras.
- From the root directory, `cd` into the `docs/` folder and run:
    - `python autogen.py`
    - `mkdocs serve`    # Starts a local webserver:  [localhost:8000](http://localhost:8000)
    - `mkdocs build`    # Builds a static site in `site/` directory

## Generate contributors list
- Prerequisites:
    - Install Pillow: `pip install Pillow`
- Generate:
    - Run: `sh shell/contributors.sh`
    - The generated file is: `docs/templates/img/contributors.svg`

================================================
FILE: docs/autogen.py
================================================
import os
import pathlib
import shutil

import keras_autodoc
import tutobooks

PAGES = {
    "image_classifier.md": [
        "autokeras.ImageClassifier",
        "autokeras.ImageClassifier.fit",
        "autokeras.ImageClassifier.predict",
        "autokeras.ImageClassifier.evaluate",
        "autokeras.ImageClassifier.export_model",
    ],
    "image_regressor.md": [
        "autokeras.ImageRegressor",
        "autokeras.ImageRegressor.fit",
        "autokeras.ImageRegressor.predict",
        "autokeras.ImageRegressor.evaluate",
        "autokeras.ImageRegressor.export_model",
    ],
    "text_classifier.md": [
        "autokeras.TextClassifier",
        "autokeras.TextClassifier.fit",
        "autokeras.TextClassifier.predict",
        "autokeras.TextClassifier.evaluate",
        "autokeras.TextClassifier.export_model",
    ],
    "text_regressor.md": [
        "autokeras.TextRegressor",
        "autokeras.TextRegressor.fit",
        "autokeras.TextRegressor.predict",
        "autokeras.TextRegressor.evaluate",
        "autokeras.TextRegressor.export_model",
    ],
    "structured_data_classifier.md": [
        "autokeras.StructuredDataClassifier",
        "autokeras.StructuredDataClassifier.fit",
        "autokeras.StructuredDataClassifier.predict",
        "autokeras.StructuredDataClassifier.evaluate",
        "autokeras.StructuredDataClassifier.export_model",
    ],
    "structured_data_regressor.md": [
        "autokeras.StructuredDataRegressor",
        "autokeras.StructuredDataRegressor.fit",
        "autokeras.StructuredDataRegressor.predict",
        "autokeras.StructuredDataRegressor.evaluate",
        "autokeras.StructuredDataRegressor.export_model",
    ],
    "auto_model.md": [
        "autokeras.AutoModel",
        "autokeras.AutoModel.fit",
        "autokeras.AutoModel.predict",
        "autokeras.AutoModel.evaluate",
        "autokeras.AutoModel.export_model",
    ],
    "base.md": [
        "autokeras.Node",
        "autokeras.Block",
        "autokeras.Block.build",
        "autokeras.Head",
    ],
    "node.md": [
        "autokeras.ImageInput",
        "autokeras.Input",
        "autokeras.TextInput",
        "autokeras.StructuredDataInput",
    ],
    "block.md": [
        "autokeras.ConvBlock",
        "autokeras.DenseBlock",
        "autokeras.Embedding",
        "autokeras.Merge",
        "autokeras.ResNetBlock",
        "autokeras.RNNBlock",
        "autokeras.SpatialReduction",
        "autokeras.TemporalReduction",
        "autokeras.XceptionBlock",
        "autokeras.StructuredDataBlock",
        "autokeras.ImageBlock",
        "autokeras.TextBlock",
        "autokeras.ImageAugmentation",
        "autokeras.Normalization",
        "autokeras.ClassificationHead",
        "autokeras.RegressionHead",
    ],
}


aliases_needed = [
    "keras.callbacks.Callback",
    "keras.losses.Loss",
    "keras.metrics.Metric",
]


ROOT = "http://autokeras.com/"

autokeras_dir = pathlib.Path(__file__).resolve().parents[1]


def py_to_nb_md(dest_dir):
    dir_path = "py"
    for file_path in os.listdir("py/"):
        file_name = file_path
        py_path = os.path.join(dir_path, file_path)
        file_name_no_ext = os.path.splitext(file_name)[0]
        ext = os.path.splitext(file_name)[1]

        if ext != ".py":
            continue

        nb_path = os.path.join("ipynb", file_name_no_ext + ".ipynb")
        md_path = os.path.join(dest_dir, "tutorial", file_name_no_ext + ".md")

        tutobooks.py_to_md(py_path, nb_path, md_path, "templates/img")

        github_repo_dir = "keras-team/autokeras/blob/master/docs/"
        with open(md_path, "r") as md_file:
            button_lines = [
                ":material-link: "
                "[**View in Colab**](https://colab.research.google.com/github/"
                + github_repo_dir
                + "ipynb/"
                + file_name_no_ext
                + ".ipynb"
                + ")   &nbsp; &nbsp;"
                # + '<span class="k-dot">•</span>'
                + ":octicons-mark-github-16: "
                "[**GitHub source**](https://github.com/"
                + github_repo_dir
                + "py/"
                + file_name_no_ext
                + ".py)",
                "\n",
            ]
            md_content = "".join(button_lines) + "\n" + md_file.read()

        with open(md_path, "w") as md_file:
            md_file.write(md_content)


def generate(dest_dir):
    template_dir = autokeras_dir / "docs" / "templates"
    doc_generator = keras_autodoc.DocumentationGenerator(
        PAGES,
        "https://github.com/keras-team/autokeras/blob/master",
        template_dir,
        autokeras_dir / "examples",
        extra_aliases=aliases_needed,
    )
    doc_generator.generate(dest_dir)
    readme = (autokeras_dir / "README.md").read_text()
    index = (template_dir / "index.md").read_text()
    index = index.replace("{{autogenerated}}", readme[readme.find("##") :])
    (dest_dir / "index.md").write_text(index, encoding="utf-8")
    shutil.copyfile(
        autokeras_dir / ".github" / "CONTRIBUTING.md",
        dest_dir / "contributing.md",
    )

    py_to_nb_md(dest_dir)


if __name__ == "__main__":
    generate(autokeras_dir / "docs" / "sources")


================================================
FILE: docs/ipynb/customized.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "!export KERAS_BACKEND=\"torch\"\n",
    "!pip install autokeras"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "import keras\n",
    "import numpy as np\n",
    "import tree\n",
    "from keras.datasets import mnist\n",
    "\n",
    "import autokeras as ak"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "In this tutorial, we show how to customize your search space with\n",
    "[AutoModel](/auto_model/#automodel-class) and how to implement your own block\n",
    "as search space.  This API is mainly for advanced users who already know what\n",
    "their model should look like.\n",
    "\n",
    "## Customized Search Space\n",
    "First, let us see how we can build the following neural network using the\n",
    "building blocks in AutoKeras.\n",
    "\n",
    "<div class=\"mermaid\">\n",
    "graph LR\n",
    "    id1(ImageInput) --> id2(Normalization)\n",
    "    id2 --> id3(Image Augmentation)\n",
    "    id3 --> id4(Convolutional)\n",
    "    id3 --> id5(ResNet V2)\n",
    "    id4 --> id6(Merge)\n",
    "    id5 --> id6\n",
    "    id6 --> id7(Classification Head)\n",
    "</div>\n",
    "\n",
    "We can make use of the [AutoModel](/auto_model/#automodel-class) API in\n",
    "AutoKeras to implemented as follows.\n",
    "The usage is the same as the [Keras functional\n",
    "API](https://keras.io/api/models/model/#with-the-functional-api).\n",
    "Since this is just a demo, we use small amount of `max_trials` and `epochs`.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "input_node = ak.ImageInput()\n",
    "output_node = ak.Normalization()(input_node)\n",
    "output_node1 = ak.ConvBlock()(output_node)\n",
    "output_node2 = ak.ResNetBlock(version=\"v2\")(output_node)\n",
    "output_node = ak.Merge()([output_node1, output_node2])\n",
    "output_node = ak.ClassificationHead()(output_node)\n",
    "\n",
    "auto_model = ak.AutoModel(\n",
    "    inputs=input_node, outputs=output_node, overwrite=True, max_trials=1\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "Whild building the model, the blocks used need to follow this topology:\n",
    "`Preprocessor` -> `Block` -> `Head`. `Normalization` and `ImageAugmentation`\n",
    "are `Preprocessor`s.\n",
    "`ClassificationHead` is `Head`. The rest are `Block`s.\n",
    "\n",
    "In the code above, we use `ak.ResNetBlock(version='v2')` to specify the version\n",
    "of ResNet to use.  There are many other arguments to specify for each building\n",
    "block.  For most of the arguments, if not specified, they would be tuned\n",
    "automatically.  Please refer to the documentation links at the bottom of the\n",
    "page for more details.\n",
    "\n",
    "Then, we prepare some data to run the model.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
    "print(x_train.shape)  # (60000, 28, 28)\n",
    "print(y_train.shape)  # (60000,)\n",
    "print(y_train[:3])  # array([7, 2, 1], dtype=uint8)\n",
    "\n",
    "# Feed the AutoModel with training data.\n",
    "auto_model.fit(x_train[:100], y_train[:100], epochs=1)\n",
    "# Predict with the best model.\n",
    "predicted_y = auto_model.predict(x_test)\n",
    "# Evaluate the best model with testing data.\n",
    "print(auto_model.evaluate(x_test, y_test))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "For multiple input nodes and multiple heads search space, you can refer to\n",
    "[this section](/tutorial/multi/#customized-search-space).\n",
    "\n",
    "## Validation Data\n",
    "If you would like to provide your own validation data or change the ratio of\n",
    "the validation data, please refer to the Validation Data section of the\n",
    "tutorials of [Image\n",
    "Classification](/tutorial/image_classification/#validation-data), [Text\n",
    "Classification](/tutorial/text_classification/#validation-data), [Structured\n",
    "Data\n",
    "Classification](/tutorial/structured_data_classification/#validation-data),\n",
    "[Multi-task and Multiple Validation](/tutorial/multi/#validation-data).\n",
    "\n",
    "## Implement New Block\n",
    "\n",
    "You can extend the [Block](/base/#block-class)\n",
    "class to implement your own building blocks and use it with\n",
    "[AutoModel](/auto_model/#automodel-class).\n",
    "\n",
    "The first step is to learn how to write a build function for\n",
    "[KerasTuner](https://keras-team.github.io/keras-tuner/#usage-the-basics).  You\n",
    "need to override the [build function](/base/#build-method) of the block.  The\n",
    "following example shows how to implement a single Dense layer block whose\n",
    "number of neurons is tunable.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "class SingleDenseLayerBlock(ak.Block):\n",
    "    def build(self, hp, inputs=None):\n",
    "        # Get the input_node from inputs.\n",
    "        input_node = tree.flatten(inputs)[0]\n",
    "        layer = keras.layers.Dense(\n",
    "            hp.Int(\"num_units\", min_value=32, max_value=512, step=32)\n",
    "        )\n",
    "        output_node = layer(input_node)\n",
    "        return output_node"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "You can connect it with other blocks and build it into an\n",
    "[AutoModel](/auto_model/#automodel-class).\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "# Build the AutoModel\n",
    "input_node = ak.Input()\n",
    "output_node = SingleDenseLayerBlock()(input_node)\n",
    "output_node = ak.RegressionHead()(output_node)\n",
    "auto_model = ak.AutoModel(input_node, output_node, overwrite=True, max_trials=1)\n",
    "# Prepare Data\n",
    "num_instances = 100\n",
    "x_train = np.random.rand(num_instances, 20).astype(np.float32)\n",
    "y_train = np.random.rand(num_instances, 1).astype(np.float32)\n",
    "x_test = np.random.rand(num_instances, 20).astype(np.float32)\n",
    "y_test = np.random.rand(num_instances, 1).astype(np.float32)\n",
    "# Train the model\n",
    "auto_model.fit(x_train, y_train, epochs=1)\n",
    "print(auto_model.evaluate(x_test, y_test))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "## Reference\n",
    "\n",
    "[AutoModel](/auto_model/#automodel-class)\n",
    "\n",
    "**Nodes**:\n",
    "[ImageInput](/node/#imageinput-class),\n",
    "[Input](/node/#input-class),\n",
    "[TextInput](/node/#textinput-class).\n",
    "[StructuredDataInput](/node/#structureddatainput-class),\n",
    "\n",
    "**Preprocessors**:\n",
    "[FeatureEngineering](/block/#featureengineering-class),\n",
    "[ImageAugmentation](/block/#imageaugmentation-class),\n",
    "[LightGBM](/block/#lightgbm-class),\n",
    "[Normalization](/block/#normalization-class),\n",
    "\n",
    "**Blocks**:\n",
    "[ConvBlock](/block/#convblock-class),\n",
    "[DenseBlock](/block/#denseblock-class),\n",
    "[Embedding](/block/#embedding-class),\n",
    "[Merge](/block/#merge-class),\n",
    "[ResNetBlock](/block/#resnetblock-class),\n",
    "[RNNBlock](/block/#rnnblock-class),\n",
    "[SpatialReduction](/block/#spatialreduction-class),\n",
    "[TemporalReduction](/block/#temporalreduction-class),\n",
    "[XceptionBlock](/block/#xceptionblock-class),\n",
    "[ImageBlock](/block/#imageblock-class),\n",
    "[TextBlock](/block/#textblock-class).\n",
    "[StructuredDataBlock](/block/#structureddatablock-class),\n"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "name": "customized",
   "private_outputs": false,
   "provenance": [],
   "toc_visible": true
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}

================================================
FILE: docs/ipynb/export.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "!export KERAS_BACKEND=\"torch\"\n",
    "!pip install autokeras"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from keras.datasets import mnist\n",
    "from keras.models import load_model\n",
    "\n",
    "import autokeras as ak"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "You can easily export your model the best model found by AutoKeras as a Keras\n",
    "Model.\n",
    "\n",
    "The following example uses [ImageClassifier](/image_classifier) as an example.\n",
    "All the tasks and the [AutoModel](/auto_model/#automodel-class) has this\n",
    "[export_model](/auto_model/#export_model-method) function.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
    "\n",
    "# Initialize the image classifier.\n",
    "clf = ak.ImageClassifier(\n",
    "    overwrite=True, max_trials=1\n",
    ")  # Try only 1 model.(Increase accordingly)\n",
    "# Feed the image classifier with training data.\n",
    "clf.fit(x_train, y_train, epochs=1)  # Change no of epochs to improve the model\n",
    "# Export as a Keras Model.\n",
    "model = clf.export_model()\n",
    "\n",
    "print(type(model))  # <class 'keras.engine.training.Model'>\n",
    "\n",
    "model.save(\"model_autokeras.keras\")\n",
    "\n",
    "\n",
    "loaded_model = load_model(\n",
    "    \"model_autokeras.keras\", custom_objects=ak.CUSTOM_OBJECTS\n",
    ")\n",
    "\n",
    "predicted_y = loaded_model.predict(np.expand_dims(x_test, -1))\n",
    "print(predicted_y)"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "name": "export",
   "private_outputs": false,
   "provenance": [],
   "toc_visible": true
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}

================================================
FILE: docs/ipynb/image_classification.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "!export KERAS_BACKEND=\"torch\"\n",
    "!pip install autokeras"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "from keras.datasets import mnist\n",
    "\n",
    "import autokeras as ak"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "## A Simple Example\n",
    "The first step is to prepare your data. Here we use the MNIST dataset as an\n",
    "example\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
    "x_train = x_train[:100]\n",
    "y_train = y_train[:100]\n",
    "x_test = x_test[:100]\n",
    "y_test = y_test[:100]\n",
    "print(x_train.shape)  # (60000, 28, 28)\n",
    "print(y_train.shape)  # (60000,)\n",
    "print(y_train[:3])  # array([7, 2, 1], dtype=uint8)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "The second step is to run the ImageClassifier.\n",
    "It is recommended have more trials for more complicated datasets.\n",
    "This is just a quick demo of MNIST, so we set max_trials to 1.\n",
    "For the same reason, we set epochs to 10.\n",
    "You can also leave the epochs unspecified for an adaptive number of epochs.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "# Initialize the image classifier.\n",
    "clf = ak.ImageClassifier(overwrite=True, max_trials=1)\n",
    "# Feed the image classifier with training data.\n",
    "clf.fit(x_train, y_train, epochs=1)\n",
    "\n",
    "\n",
    "# Predict with the best model.\n",
    "predicted_y = clf.predict(x_test)\n",
    "print(predicted_y)\n",
    "\n",
    "\n",
    "# Evaluate the best model with testing data.\n",
    "print(clf.evaluate(x_test, y_test))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "## Validation Data\n",
    "By default, AutoKeras use the last 20% of training data as validation data. As\n",
    "shown in the example below, you can use validation_split to specify the\n",
    "percentage.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "clf.fit(\n",
    "    x_train,\n",
    "    y_train,\n",
    "    # Split the training data and use the last 15% as validation data.\n",
    "    validation_split=0.15,\n",
    "    epochs=1,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "You can also use your own validation set instead of splitting it from the\n",
    "training data with validation_data.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "split = 50000\n",
    "x_val = x_train[split:]\n",
    "y_val = y_train[split:]\n",
    "x_train = x_train[:split]\n",
    "y_train = y_train[:split]\n",
    "clf.fit(\n",
    "    x_train,\n",
    "    y_train,\n",
    "    # Use your own validation set.\n",
    "    validation_data=(x_val, y_val),\n",
    "    epochs=1,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "## Customized Search Space\n",
    "For advanced users, you may customize your search space by using AutoModel\n",
    "instead of ImageClassifier. You can configure the ImageBlock for some\n",
    "high-level configurations, e.g., block_type for the type of neural network to\n",
    "search, normalize for whether to do data normalization, augment for whether to\n",
    "do data augmentation. You can also do not specify these arguments, which would\n",
    "leave the different choices to be tuned automatically. See the following\n",
    "example for detail.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "input_node = ak.ImageInput()\n",
    "output_node = ak.ImageBlock(\n",
    "    # Only search ResNet architectures.\n",
    "    block_type=\"resnet\",\n",
    "    # Normalize the dataset.\n",
    "    normalize=True,\n",
    "    # Do not do data augmentation.\n",
    "    augment=False,\n",
    ")(input_node)\n",
    "output_node = ak.ClassificationHead()(output_node)\n",
    "clf = ak.AutoModel(\n",
    "    inputs=input_node, outputs=output_node, overwrite=True, max_trials=1\n",
    ")\n",
    "clf.fit(x_train, y_train, epochs=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "The usage of AutoModel is similar to the functional API of Keras. Basically, you\n",
    "are building a graph, whose edges are blocks and the nodes are intermediate\n",
    "outputs of blocks. To add an edge from input_node to output_node with\n",
    "output_node = ak.[some_block]([block_args])(input_node).\n",
    "\n",
    "You can even also use more fine grained blocks to customize the search space\n",
    "even further. See the following example.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "input_node = ak.ImageInput()\n",
    "output_node = ak.Normalization()(input_node)\n",
    "output_node = ak.ImageAugmentation(horizontal_flip=False)(output_node)\n",
    "output_node = ak.ResNetBlock(version=\"v2\")(output_node)\n",
    "output_node = ak.ClassificationHead()(output_node)\n",
    "clf = ak.AutoModel(\n",
    "    inputs=input_node, outputs=output_node, overwrite=True, max_trials=1\n",
    ")\n",
    "clf.fit(x_train, y_train, epochs=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "## Reference\n",
    "[ImageClassifier](/image_classifier),\n",
    "[AutoModel](/auto_model/#automodel-class),\n",
    "[ImageBlock](/block/#imageblock-class),\n",
    "[Normalization](/block/#normalization-class),\n",
    "[ImageAugmentation](/block/#image-augmentation-class),\n",
    "[ResNetBlock](/block/#resnetblock-class),\n",
    "[ImageInput](/node/#imageinput-class),\n",
    "[ClassificationHead](/block/#classificationhead-class).\n"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "name": "image_classification",
   "private_outputs": false,
   "provenance": [],
   "toc_visible": true
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}

================================================
FILE: docs/ipynb/image_regression.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "!export KERAS_BACKEND=\"torch\"\n",
    "!pip install autokeras"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "from keras.datasets import mnist\n",
    "\n",
    "import autokeras as ak"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "To make this tutorial easy to follow, we just treat MNIST dataset as a\n",
    "regression dataset. It means we will treat prediction targets of MNIST dataset,\n",
    "which are integers ranging from 0 to 9 as numerical values, so that they can be\n",
    "directly used as the regression targets.\n",
    "\n",
    "## A Simple Example\n",
    "The first step is to prepare your data. Here we use the MNIST dataset as an\n",
    "example\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
    "x_train = x_train[:100]\n",
    "y_train = y_train[:100]\n",
    "x_test = x_test[:100]\n",
    "y_test = y_test[:100]\n",
    "print(x_train.shape)  # (60000, 28, 28)\n",
    "print(y_train.shape)  # (60000,)\n",
    "print(y_train[:3])  # array([7, 2, 1], dtype=uint8)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "The second step is to run the ImageRegressor.  It is recommended have more\n",
    "trials for more complicated datasets.  This is just a quick demo of MNIST, so\n",
    "we set max_trials to 1.  For the same reason, we set epochs to 1.  You can also\n",
    "leave the epochs unspecified for an adaptive number of epochs.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "# Initialize the image regressor.\n",
    "reg = ak.ImageRegressor(overwrite=True, max_trials=1)\n",
    "# Feed the image regressor with training data.\n",
    "reg.fit(x_train, y_train, epochs=1)\n",
    "\n",
    "\n",
    "# Predict with the best model.\n",
    "predicted_y = reg.predict(x_test)\n",
    "print(predicted_y)\n",
    "\n",
    "\n",
    "# Evaluate the best model with testing data.\n",
    "print(reg.evaluate(x_test, y_test))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "## Validation Data\n",
    "By default, AutoKeras use the last 20% of training data as validation data. As\n",
    "shown in the example below, you can use validation_split to specify the\n",
    "percentage.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "reg.fit(\n",
    "    x_train,\n",
    "    y_train,\n",
    "    # Split the training data and use the last 15% as validation data.\n",
    "    validation_split=0.15,\n",
    "    epochs=1,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "You can also use your own validation set instead of splitting it from the\n",
    "training data with validation_data.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "split = 50000\n",
    "x_val = x_train[split:]\n",
    "y_val = y_train[split:]\n",
    "x_train = x_train[:split]\n",
    "y_train = y_train[:split]\n",
    "reg.fit(\n",
    "    x_train,\n",
    "    y_train,\n",
    "    # Use your own validation set.\n",
    "    validation_data=(x_val, y_val),\n",
    "    epochs=2,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "## Customized Search Space\n",
    "For advanced users, you may customize your search space by using AutoModel\n",
    "instead of ImageRegressor. You can configure the ImageBlock for some high-level\n",
    "configurations, e.g., block_type for the type of neural network to search,\n",
    "normalize for whether to do data normalization, augment for whether to do data\n",
    "augmentation. You can also do not specify these arguments, which would leave\n",
    "the different choices to be tuned automatically. See the following example for\n",
    "detail.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "input_node = ak.ImageInput()\n",
    "output_node = ak.ImageBlock(\n",
    "    # Only search ResNet architectures.\n",
    "    block_type=\"resnet\",\n",
    "    # Normalize the dataset.\n",
    "    normalize=False,\n",
    "    # Do not do data augmentation.\n",
    "    augment=False,\n",
    ")(input_node)\n",
    "output_node = ak.RegressionHead()(output_node)\n",
    "reg = ak.AutoModel(\n",
    "    inputs=input_node, outputs=output_node, overwrite=True, max_trials=1\n",
    ")\n",
    "reg.fit(x_train, y_train, epochs=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "The usage of AutoModel is similar to the functional API of Keras. Basically,\n",
    "you are building a graph, whose edges are blocks and the nodes are intermediate\n",
    "outputs of blocks. To add an edge from input_node to output_node with\n",
    "output_node = ak.[some_block]([block_args])(input_node).\n",
    "\n",
    "You can even also use more fine grained blocks to customize the search space\n",
    "even further. See the following example.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "input_node = ak.ImageInput()\n",
    "output_node = ak.Normalization()(input_node)\n",
    "output_node = ak.ImageAugmentation(horizontal_flip=False)(output_node)\n",
    "output_node = ak.ResNetBlock(version=\"v2\")(output_node)\n",
    "output_node = ak.RegressionHead()(output_node)\n",
    "reg = ak.AutoModel(\n",
    "    inputs=input_node, outputs=output_node, overwrite=True, max_trials=1\n",
    ")\n",
    "reg.fit(x_train, y_train, epochs=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "## Reference\n",
    "[ImageRegressor](/image_regressor),\n",
    "[AutoModel](/auto_model/#automodel-class),\n",
    "[ImageBlock](/block/#imageblock-class),\n",
    "[Normalization](/block/#normalization-class),\n",
    "[ImageAugmentation](/block/#image-augmentation-class),\n",
    "[ResNetBlock](/block/#resnetblock-class),\n",
    "[ImageInput](/node/#imageinput-class),\n",
    "[RegressionHead](/block/#regressionhead-class).\n"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "name": "image_regression",
   "private_outputs": false,
   "provenance": [],
   "toc_visible": true
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}

================================================
FILE: docs/ipynb/multi.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "!export KERAS_BACKEND=\"torch\"\n",
    "!pip install autokeras"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "import autokeras as ak"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "In this tutorial we are making use of the\n",
    "[AutoModel](/auto_model/#automodel-class)\n",
    " API to show how to handle multi-modal data and multi-task.\n",
    "\n",
    "## What is multi-modal?\n",
    "\n",
    "Multi-modal data means each data instance has multiple forms of information.\n",
    "For example, a photo can be saved as a image. Besides the image, it may also\n",
    "have when and where it was taken as its attributes, which can be represented as\n",
    "numerical data.\n",
    "\n",
    "## What is multi-task?\n",
    "\n",
    "Multi-task here we refer to we want to predict multiple targets with the same\n",
    "input features. For example, we not only want to classify an image according to\n",
    "its content, but we also want to regress its quality as a float number between\n",
    "0 and 1.\n",
    "\n",
    "The following diagram shows an example of multi-modal and multi-task neural\n",
    "network model.\n",
    "\n",
    "<div class=\"mermaid\">\n",
    "graph TD\n",
    "    id1(ImageInput) --> id3(Some Neural Network Model)\n",
    "    id2(Input) --> id3\n",
    "    id3 --> id4(ClassificationHead)\n",
    "    id3 --> id5(RegressionHead)\n",
    "</div>\n",
    "\n",
    "It has two inputs the images and the numerical input data. Each image is\n",
    "associated with a set of attributes in the numerical input data. From these\n",
    "data, we are trying to predict the classification label and the regression value\n",
    "at the same time.\n",
    "\n",
    "## Data Preparation\n",
    "\n",
    "To illustrate our idea, we generate some random image and numerical data as\n",
    "the multi-modal data.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "num_instances = 10\n",
    "# Generate image data.\n",
    "image_data = np.random.rand(num_instances, 32, 32, 3).astype(np.float32)\n",
    "# Generate numerical data.\n",
    "numerical_data = np.random.rand(num_instances, 20).astype(np.float32)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "We also generate some multi-task targets for classification and regression.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "# Generate regression targets.\n",
    "regression_target = np.random.rand(num_instances, 1).astype(np.float32)\n",
    "# Generate classification labels of five classes.\n",
    "classification_target = np.random.randint(5, size=num_instances)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "## Build and Train the Model\n",
    "Then we initialize the multi-modal and multi-task model with\n",
    "[AutoModel](/auto_model/#automodel-class).\n",
    "Since this is just a demo, we use small amount of `max_trials` and `epochs`.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "# Initialize the multi with multiple inputs and outputs.\n",
    "model = ak.AutoModel(\n",
    "    inputs=[ak.ImageInput(), ak.Input()],\n",
    "    outputs=[\n",
    "        ak.RegressionHead(metrics=[\"mae\"]),\n",
    "        ak.ClassificationHead(\n",
    "            loss=\"categorical_crossentropy\", metrics=[\"accuracy\"]\n",
    "        ),\n",
    "    ],\n",
    "    overwrite=True,\n",
    "    max_trials=2,\n",
    ")\n",
    "# Fit the model with prepared data.\n",
    "model.fit(\n",
    "    [image_data, numerical_data],\n",
    "    [regression_target, classification_target],\n",
    "    epochs=1,\n",
    "    batch_size=3,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "## Validation Data\n",
    "By default, AutoKeras use the last 20% of training data as validation data.\n",
    "As shown in the example below, you can use `validation_split` to specify the\n",
    "percentage.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "model.fit(\n",
    "    [image_data, numerical_data],\n",
    "    [regression_target, classification_target],\n",
    "    # Split the training data and use the last 15% as validation data.\n",
    "    validation_split=0.15,\n",
    "    epochs=1,\n",
    "    batch_size=3,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "You can also use your own validation set\n",
    "instead of splitting it from the training data with `validation_data`.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "split = 5\n",
    "\n",
    "image_val = image_data[split:]\n",
    "numerical_val = numerical_data[split:]\n",
    "regression_val = regression_target[split:]\n",
    "classification_val = classification_target[split:]\n",
    "\n",
    "image_data = image_data[:split]\n",
    "numerical_data = numerical_data[:split]\n",
    "regression_target = regression_target[:split]\n",
    "classification_target = classification_target[:split]\n",
    "\n",
    "model.fit(\n",
    "    [image_data, numerical_data],\n",
    "    [regression_target, classification_target],\n",
    "    # Use your own validation set.\n",
    "    validation_data=(\n",
    "        [image_val, numerical_val],\n",
    "        [regression_val, classification_val],\n",
    "    ),\n",
    "    epochs=1,\n",
    "    batch_size=3,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "## Customized Search Space\n",
    "You can customize your search space.\n",
    "The following figure shows the search space we want to define.\n",
    "\n",
    "<div class=\"mermaid\">\n",
    "graph LR\n",
    "    id1(ImageInput) --> id2(Normalization)\n",
    "    id2 --> id3(Image Augmentation)\n",
    "    id3 --> id4(Convolutional)\n",
    "    id3 --> id5(ResNet V2)\n",
    "    id4 --> id6(Merge)\n",
    "    id5 --> id6\n",
    "    id7(Input) --> id9(DenseBlock)\n",
    "    id6 --> id10(Merge)\n",
    "    id9 --> id10\n",
    "    id10 --> id11(Classification Head)\n",
    "    id10 --> id12(Regression Head)\n",
    "</div>\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "input_node1 = ak.ImageInput()\n",
    "output_node = ak.Normalization()(input_node1)\n",
    "output_node = ak.ImageAugmentation()(output_node)\n",
    "output_node1 = ak.ConvBlock()(output_node)\n",
    "output_node2 = ak.ResNetBlock(version=\"v2\")(output_node)\n",
    "output_node1 = ak.Merge()([output_node1, output_node2])\n",
    "\n",
    "input_node2 = ak.Input()\n",
    "output_node2 = ak.DenseBlock()(input_node2)\n",
    "\n",
    "output_node = ak.Merge()([output_node1, output_node2])\n",
    "output_node1 = ak.ClassificationHead()(output_node)\n",
    "output_node2 = ak.RegressionHead()(output_node)\n",
    "\n",
    "auto_model = ak.AutoModel(\n",
    "    inputs=[input_node1, input_node2],\n",
    "    outputs=[output_node1, output_node2],\n",
    "    overwrite=True,\n",
    "    max_trials=2,\n",
    ")\n",
    "\n",
    "image_data = np.random.rand(num_instances, 32, 32, 3).astype(np.float32)\n",
    "numerical_data = np.random.rand(num_instances, 20).astype(np.float32)\n",
    "regression_target = np.random.rand(num_instances, 1).astype(np.float32)\n",
    "classification_target = np.random.randint(5, size=num_instances)\n",
    "\n",
    "auto_model.fit(\n",
    "    [image_data, numerical_data],\n",
    "    [classification_target, regression_target],\n",
    "    batch_size=3,\n",
    "    epochs=1,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "## Reference\n",
    "[AutoModel](/auto_model/#automodel-class),\n",
    "[ImageInput](/node/#imageinput-class),\n",
    "[Input](/node/#input-class),\n",
    "[DenseBlock](/block/#denseblock-class),\n",
    "[RegressionHead](/block/#regressionhead-class),\n",
    "[ClassificationHead](/block/#classificationhead-class),\n",
    "[CategoricalToNumerical](/block/#categoricaltonumerical-class).\n"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "name": "multi",
   "private_outputs": false,
   "provenance": [],
   "toc_visible": true
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}

================================================
FILE: docs/ipynb/structured_data_classification.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "!export KERAS_BACKEND=\"torch\"\n",
    "!pip install autokeras"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "import keras\n",
    "import pandas as pd\n",
    "\n",
    "import autokeras as ak"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "## A Simple Example\n",
    "The first step is to prepare your data. Here we use the [Titanic\n",
    "dataset](https://www.kaggle.com/c/titanic) as an example.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "TRAIN_DATA_URL = \"https://storage.googleapis.com/tf-datasets/titanic/train.csv\"\n",
    "TEST_DATA_URL = \"https://storage.googleapis.com/tf-datasets/titanic/eval.csv\"\n",
    "\n",
    "train_file_path = keras.utils.get_file(\"train.csv\", TRAIN_DATA_URL)\n",
    "test_file_path = keras.utils.get_file(\"eval.csv\", TEST_DATA_URL)\n",
    "\n",
    "# Load data into numpy arrays\n",
    "train_df = pd.read_csv(train_file_path)\n",
    "test_df = pd.read_csv(test_file_path)\n",
    "\n",
    "y_train = train_df[\"survived\"].values\n",
    "x_train = train_df.drop(\"survived\", axis=1).values\n",
    "\n",
    "y_test = test_df[\"survived\"].values\n",
    "x_test = test_df.drop(\"survived\", axis=1).values"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "The second step is to run the\n",
    "[StructuredDataClassifier](/structured_data_classifier).\n",
    "As a quick demo, we set epochs to 10.\n",
    "You can also leave the epochs unspecified for an adaptive number of epochs.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "# Initialize the structured data classifier.\n",
    "clf = ak.StructuredDataClassifier(\n",
    "    overwrite=True, max_trials=3\n",
    ")  # It tries 3 different models.\n",
    "# Feed the structured data classifier with training data.\n",
    "clf.fit(\n",
    "    x_train,\n",
    "    y_train,\n",
    "    epochs=10,\n",
    ")\n",
    "# Predict with the best model.\n",
    "predicted_y = clf.predict(x_test)\n",
    "# Evaluate the best model with testing data.\n",
    "print(clf.evaluate(x_test, y_test))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "You can also specify the column names and types for the data as follows.  The\n",
    "`column_names` is optional if the training data already have the column names,\n",
    "e.g.  pandas.DataFrame, CSV file.  Any column, whose type is not specified will\n",
    "be inferred from the training data.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "# Initialize the structured data classifier.\n",
    "clf = ak.StructuredDataClassifier(\n",
    "    column_names=[\n",
    "        \"sex\",\n",
    "        \"age\",\n",
    "        \"n_siblings_spouses\",\n",
    "        \"parch\",\n",
    "        \"fare\",\n",
    "        \"class\",\n",
    "        \"deck\",\n",
    "        \"embark_town\",\n",
    "        \"alone\",\n",
    "    ],\n",
    "    column_types={\"sex\": \"categorical\", \"fare\": \"numerical\"},\n",
    "    max_trials=10,  # It tries 10 different models.\n",
    "    overwrite=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "## Validation Data\n",
    "By default, AutoKeras use the last 20% of training data as validation data.  As\n",
    "shown in the example below, you can use `validation_split` to specify the\n",
    "percentage.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "clf.fit(\n",
    "    x_train,\n",
    "    y_train,\n",
    "    # Split the training data and use the last 15% as validation data.\n",
    "    validation_split=0.15,\n",
    "    epochs=10,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "You can also use your own validation set\n",
    "instead of splitting it from the training data with `validation_data`.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "split = 500\n",
    "x_val = x_train[split:]\n",
    "y_val = y_train[split:]\n",
    "x_train = x_train[:split]\n",
    "y_train = y_train[:split]\n",
    "clf.fit(\n",
    "    x_train,\n",
    "    y_train,\n",
    "    # Use your own validation set.\n",
    "    validation_data=(x_val, y_val),\n",
    "    epochs=10,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "## Reference\n",
    "[StructuredDataClassifier](/structured_data_classifier),\n",
    "[AutoModel](/auto_model/#automodel-class),\n",
    "[StructuredDataBlock](/block/#structureddatablock-class),\n",
    "[DenseBlock](/block/#denseblock-class),\n",
    "[StructuredDataInput](/node/#structureddatainput-class),\n",
    "[ClassificationHead](/block/#classificationhead-class),\n"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "name": "structured_data_classification",
   "private_outputs": false,
   "provenance": [],
   "toc_visible": true
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}

================================================
FILE: docs/ipynb/structured_data_regression.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "!export KERAS_BACKEND=\"torch\"\n",
    "!pip install autokeras"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "from sklearn.datasets import fetch_california_housing\n",
    "\n",
    "import autokeras as ak"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "## A Simple Example\n",
    "The first step is to prepare your data. Here we use the [California housing\n",
    "dataset](\n",
    "https://scikit-learn.org/stable/datasets/real_world.html#california-housing-dataset)\n",
    "as an example.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "house_dataset = fetch_california_housing()\n",
    "train_size = int(house_dataset.data.shape[0] * 0.9)\n",
    "\n",
    "x_train = house_dataset.data[:train_size]\n",
    "y_train = house_dataset.target[:train_size]\n",
    "x_test = house_dataset.data[train_size:]\n",
    "y_test = house_dataset.target[train_size:]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "The second step is to run the\n",
    "[StructuredDataRegressor](/structured_data_regressor).\n",
    "As a quick demo, we set epochs to 10.\n",
    "You can also leave the epochs unspecified for an adaptive number of epochs.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "# Initialize the structured data regressor.\n",
    "reg = ak.StructuredDataRegressor(\n",
    "    overwrite=True, max_trials=3\n",
    ")  # It tries 3 different models.\n",
    "# Feed the structured data regressor with training data.\n",
    "reg.fit(\n",
    "    x_train,\n",
    "    y_train,\n",
    "    epochs=10,\n",
    ")\n",
    "# Predict with the best model.\n",
    "predicted_y = reg.predict(x_test)\n",
    "# Evaluate the best model with testing data.\n",
    "print(reg.evaluate(x_test, y_test))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "You can also specify the column names and types for the data as follows.  The\n",
    "`column_names` is optional if the training data already have the column names,\n",
    "e.g.  pandas.DataFrame, CSV file.  Any column, whose type is not specified will\n",
    "be inferred from the training data.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "# Initialize the structured data regressor.\n",
    "reg = ak.StructuredDataRegressor(\n",
    "    column_names=[\n",
    "        \"MedInc\",\n",
    "        \"HouseAge\",\n",
    "        \"AveRooms\",\n",
    "        \"AveBedrms\",\n",
    "        \"Population\",\n",
    "        \"AveOccup\",\n",
    "        \"Latitude\",\n",
    "        \"Longitude\",\n",
    "    ],\n",
    "    column_types={\"MedInc\": \"numerical\", \"Latitude\": \"numerical\"},\n",
    "    max_trials=10,  # It tries 10 different models.\n",
    "    overwrite=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "## Validation Data\n",
    "By default, AutoKeras use the last 20% of training data as validation data.  As\n",
    "shown in the example below, you can use `validation_split` to specify the\n",
    "percentage.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "reg.fit(\n",
    "    x_train,\n",
    "    y_train,\n",
    "    # Split the training data and use the last 15% as validation data.\n",
    "    validation_split=0.15,\n",
    "    epochs=10,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "You can also use your own validation set\n",
    "instead of splitting it from the training data with `validation_data`.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "split = 500\n",
    "x_val = x_train[split:]\n",
    "y_val = y_train[split:]\n",
    "x_train = x_train[:split]\n",
    "y_train = y_train[:split]\n",
    "reg.fit(\n",
    "    x_train,\n",
    "    y_train,\n",
    "    # Use your own validation set.\n",
    "    validation_data=(x_val, y_val),\n",
    "    epochs=10,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "## Reference\n",
    "[StructuredDataRegressor](/structured_data_regressor),\n",
    "[AutoModel](/auto_model/#automodel-class),\n",
    "[StructuredDataBlock](/block/#structureddatablock-class),\n",
    "[DenseBlock](/block/#denseblock-class),\n",
    "[StructuredDataInput](/node/#structureddatainput-class),\n",
    "[RegressionHead](/block/#regressionhead-class),\n"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "name": "structured_data_regression",
   "private_outputs": false,
   "provenance": [],
   "toc_visible": true
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}

================================================
FILE: docs/ipynb/text_classification.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "!export KERAS_BACKEND=\"torch\"\n",
    "!pip install autokeras"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "import keras\n",
    "import numpy as np\n",
    "from sklearn.datasets import load_files\n",
    "\n",
    "import autokeras as ak"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "## A Simple Example\n",
    "The first step is to prepare your data. Here we use the [IMDB\n",
    "dataset](https://keras.io/datasets/#imdb-movie-reviews-sentiment-classification)\n",
    "as an example.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "dataset = keras.utils.get_file(\n",
    "    fname=\"aclImdb.tar.gz\",\n",
    "    origin=\"http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\",\n",
    "    extract=True,\n",
    ")\n",
    "\n",
    "# set path to dataset\n",
    "IMDB_DATADIR = os.path.join(\n",
    "    os.path.dirname(dataset), \"aclImdb_extracted\", \"aclImdb\"\n",
    ")\n",
    "\n",
    "classes = [\"pos\", \"neg\"]\n",
    "train_data = load_files(\n",
    "    os.path.join(IMDB_DATADIR, \"train\"), shuffle=True, categories=classes\n",
    ")\n",
    "test_data = load_files(\n",
    "    os.path.join(IMDB_DATADIR, \"test\"), shuffle=False, categories=classes\n",
    ")\n",
    "\n",
    "x_train = np.array(train_data.data)[:100]\n",
    "y_train = np.array(train_data.target)[:100]\n",
    "x_test = np.array(test_data.data)[:100]\n",
    "y_test = np.array(test_data.target)[:100]\n",
    "\n",
    "print(x_train.shape)  # (25000,)\n",
    "print(y_train.shape)  # (25000, 1)\n",
    "print(x_train[0][:50])  # this film was just brilliant casting"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "The second step is to run the [TextClassifier](/text_classifier).  As a quick\n",
    "demo, we set epochs to 2.  You can also leave the epochs unspecified for an\n",
    "adaptive number of epochs.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "# Initialize the text classifier.\n",
    "clf = ak.TextClassifier(\n",
    "    overwrite=True, max_trials=1\n",
    ")  # It only tries 1 model as a quick demo.\n",
    "# Feed the text classifier with training data.\n",
    "clf.fit(x_train, y_train, epochs=1, batch_size=2)\n",
    "# Predict with the best model.\n",
    "predicted_y = clf.predict(x_test)\n",
    "# Evaluate the best model with testing data.\n",
    "print(clf.evaluate(x_test, y_test))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "## Validation Data\n",
    "By default, AutoKeras use the last 20% of training data as validation data.  As\n",
    "shown in the example below, you can use `validation_split` to specify the\n",
    "percentage.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "clf.fit(\n",
    "    x_train,\n",
    "    y_train,\n",
    "    # Split the training data and use the last 15% as validation data.\n",
    "    validation_split=0.15,\n",
    "    epochs=1,\n",
    "    batch_size=2,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "You can also use your own validation set instead of splitting it from the\n",
    "training data with `validation_data`.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "split = 5\n",
    "x_val = x_train[split:]\n",
    "y_val = y_train[split:]\n",
    "x_train = x_train[:split]\n",
    "y_train = y_train[:split]\n",
    "clf.fit(\n",
    "    x_train,\n",
    "    y_train,\n",
    "    epochs=1,\n",
    "    # Use your own validation set.\n",
    "    validation_data=(x_val, y_val),\n",
    "    batch_size=2,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "## Customized Search Space\n",
    "For advanced users, you may customize your search space by using\n",
    "[AutoModel](/auto_model/#automodel-class) instead of\n",
    "[TextClassifier](/text_classifier). You can configure the\n",
    "[TextBlock](/block/#textblock-class) for some high-level configurations. You can\n",
    "also do not specify these arguments, which would leave the different choices to\n",
    "be tuned automatically.  See the following example for detail.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "input_node = ak.TextInput()\n",
    "output_node = ak.TextBlock()(input_node)\n",
    "output_node = ak.ClassificationHead()(output_node)\n",
    "clf = ak.AutoModel(\n",
    "    inputs=input_node, outputs=output_node, overwrite=True, max_trials=1\n",
    ")\n",
    "clf.fit(x_train, y_train, epochs=1, batch_size=2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "## Reference\n",
    "[TextClassifier](/text_classifier),\n",
    "[AutoModel](/auto_model/#automodel-class),\n",
    "[ConvBlock](/block/#convblock-class),\n",
    "[TextInput](/node/#textinput-class),\n",
    "[ClassificationHead](/block/#classificationhead-class).\n"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "name": "text_classification",
   "private_outputs": false,
   "provenance": [],
   "toc_visible": true
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}

================================================
FILE: docs/ipynb/text_regression.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "!export KERAS_BACKEND=\"torch\"\n",
    "!pip install autokeras"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "import keras\n",
    "import numpy as np\n",
    "from sklearn.datasets import load_files\n",
    "\n",
    "import autokeras as ak"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "To make this tutorial easy to follow, we just treat IMDB dataset as a\n",
    "regression dataset. It means we will treat prediction targets of IMDB dataset,\n",
    "which are 0s and 1s as numerical values, so that they can be directly used as\n",
    "the regression targets.\n",
    "\n",
    "## A Simple Example\n",
    "The first step is to prepare your data. Here we use the [IMDB\n",
    "dataset](https://keras.io/datasets/#imdb-movie-reviews-sentiment-classification)\n",
    "as an example.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "dataset = keras.utils.get_file(\n",
    "    fname=\"aclImdb.tar.gz\",\n",
    "    origin=\"http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\",\n",
    "    extract=True,\n",
    ")\n",
    "\n",
    "# set path to dataset\n",
    "IMDB_DATADIR = os.path.join(\n",
    "    os.path.dirname(dataset), \"aclImdb_extracted\", \"aclImdb\"\n",
    ")\n",
    "\n",
    "classes = [\"pos\", \"neg\"]\n",
    "train_data = load_files(\n",
    "    os.path.join(IMDB_DATADIR, \"train\"), shuffle=True, categories=classes\n",
    ")\n",
    "test_data = load_files(\n",
    "    os.path.join(IMDB_DATADIR, \"test\"), shuffle=False, categories=classes\n",
    ")\n",
    "\n",
    "x_train = np.array(train_data.data)[:100]\n",
    "y_train = np.array(train_data.target)[:100]\n",
    "x_test = np.array(test_data.data)[:100]\n",
    "y_test = np.array(test_data.target)[:100]\n",
    "\n",
    "print(x_train.shape)  # (25000,)\n",
    "print(y_train.shape)  # (25000, 1)\n",
    "print(x_train[0][:50])  # <START> this film was just brilliant casting <UNK>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "The second step is to run the [TextRegressor](/text_regressor).  As a quick\n",
    "demo, we set epochs to 2.  You can also leave the epochs unspecified for an\n",
    "adaptive number of epochs.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "# Initialize the text regressor.\n",
    "reg = ak.TextRegressor(\n",
    "    overwrite=True, max_trials=1  # It tries 10 different models.\n",
    ")\n",
    "# Feed the text regressor with training data.\n",
    "reg.fit(x_train, y_train, epochs=1, batch_size=2)\n",
    "# Predict with the best model.\n",
    "predicted_y = reg.predict(x_test)\n",
    "# Evaluate the best model with testing data.\n",
    "print(reg.evaluate(x_test, y_test))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "## Validation Data\n",
    "By default, AutoKeras use the last 20% of training data as validation data.  As\n",
    "shown in the example below, you can use `validation_split` to specify the\n",
    "percentage.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "reg.fit(\n",
    "    x_train,\n",
    "    y_train,\n",
    "    # Split the training data and use the last 15% as validation data.\n",
    "    validation_split=0.15,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "You can also use your own validation set instead of splitting it from the\n",
    "training data with `validation_data`.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "split = 5\n",
    "x_val = x_train[split:]\n",
    "y_val = y_train[split:]\n",
    "x_train = x_train[:split]\n",
    "y_train = y_train[:split]\n",
    "reg.fit(\n",
    "    x_train,\n",
    "    y_train,\n",
    "    epochs=1,\n",
    "    # Use your own validation set.\n",
    "    validation_data=(x_val, y_val),\n",
    "    batch_size=2,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "## Customized Search Space\n",
    "For advanced users, you may customize your search space by using\n",
    "[AutoModel](/auto_model/#automodel-class) instead of\n",
    "[TextRegressor](/text_regressor). You can configure the\n",
    "[TextBlock](/block/#textblock-class) for some high-level configurations. You can\n",
    "also do not specify these arguments, which would leave the different choices to\n",
    "be tuned automatically.  See the following example for detail.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab_type": "code"
   },
   "outputs": [],
   "source": [
    "input_node = ak.TextInput()\n",
    "output_node = ak.TextBlock()(input_node)\n",
    "output_node = ak.RegressionHead()(output_node)\n",
    "reg = ak.AutoModel(\n",
    "    inputs=input_node, outputs=output_node, overwrite=True, max_trials=1\n",
    ")\n",
    "reg.fit(x_train, y_train, epochs=1, batch_size=2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text"
   },
   "source": [
    "## Reference\n",
    "[TextRegressor](/text_regressor),\n",
    "[AutoModel](/auto_model/#automodel-class),\n",
    "[TextBlock](/block/#textblock-class),\n",
    "[ConvBlock](/block/#convblock-class),\n",
    "[TextInput](/node/#textinput-class),\n",
    "[RegressionHead](/block/#regressionhead-class).\n"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "name": "text_regression",
   "private_outputs": false,
   "provenance": [],
   "toc_visible": true
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}

================================================
FILE: docs/keras_autodoc/__init__.py
================================================
from .autogen import DocumentationGenerator
from .gathering_members import get_classes
from .gathering_members import get_functions
from .gathering_members import get_methods


================================================
FILE: docs/keras_autodoc/autogen.py
================================================
import pathlib
import shutil
from inspect import getdoc
from inspect import isclass
from typing import Dict
from typing import List
from typing import Union
from typing import get_type_hints

from . import utils
from .docstring import process_docstring
from .examples import copy_examples
from .get_signatures import get_signature


class DocumentationGenerator:
    """Generates the documentation.

    # Arguments

        pages: A dictionary. The keys are the files' paths, the values
            are lists of strings, functions /classes / methods names
            with dotted access to the object. For example,
            `pages = {'my_file.md': ['keras.layers.Dense']}` is valid.
        project_url: The url pointing to the module directory of your project on
            GitHub. This will be used to make a `[Sources]` link.
        template_dir: Where to put the markdown files which will be copied and
            filled in the destination directory. You should put files like
            `index.md` inside. If you want a markdown file to be filled with
            the docstring of a function, use the `{{autogenerated}}` tag inside,
            and then add the markdown file to the `pages` dictionary.
        example_dir: Where you store examples in your project. Usually
            standalone files with a markdown docstring at the top. Will be
            inserted in the docs.
        extra_aliases: When displaying type hints, it's possible that the full
            dotted path is displayed instead of alias. The aliases present in
            `pages` are used, but it may happen if you're using a third-party
            library. For example `tensorflow.python.ops.variables.Variable` is
            displayed instead of `tensorflow.Variable`. Here you have two
            solutions, either you provide the import keras-autodoc should
            follow:
            `extra_aliases=["tensorflow.Variable"]`, either you provide a
            mapping to use
            `extra_aliases={"tensorflow.python.ops.variables.Variable":
            "tf.Variable"}`.  The second option should be used if you want more
            control and that you don't want to respect the alias corresponding
            to the import (you can't do `import tf.Variable`). When giving a
            list, keras-autodoc will try to import the object from the string to
            understand what object you want to replace.
        max_signature_line_length: When displaying class and function
            signatures, keras-autodoc formats them using Black. This parameter
            controls the maximum line length of these signatures, and is passed
            directly through to Black.
        titles_size: `"#"` signs to put before a title in the generated
            markdown.
    """

    def __init__(
        self,
        pages: Dict[str, list] = {},
        project_url: Union[str, Dict[str, str]] = None,
        template_dir=None,
        examples_dir=None,
        extra_aliases: Union[List[str], Dict[str, str]] = None,
        max_signature_line_length: int = 110,
        titles_size="###",
    ):
        self.pages = pages
        self.project_url = project_url
        self.template_dir = template_dir
        self.examples_dir = examples_dir
        self.class_aliases = {}
        self._fill_aliases(extra_aliases)
        self.max_signature_line_length = max_signature_line_length
        self.titles_size = titles_size

    def generate(self, dest_dir):
        """Generate the docs.

        # Arguments

            dest_dir: Where to put the resulting markdown files.
        """
        dest_dir = pathlib.Path(dest_dir)
        print("Cleaning up existing sources directory.")
        if dest_dir.exists():
            shutil.rmtree(dest_dir)

        print("Populating sources directory with templates.")
        if self.template_dir:
            shutil.copytree(self.template_dir, dest_dir)

        for file_path, elements in self.pages.items():
            markdown_text = ""
            for element in elements:
                markdown_text += self._render(element)
            utils.insert_in_file(markdown_text, dest_dir / file_path)

        if self.examples_dir is not None:
            copy_examples(self.examples_dir, dest_dir / "examples")

    def process_docstring(self, docstring, types: dict = None):
        """Can be overridden."""
        processsed = process_docstring(docstring, types, self.class_aliases)
        return processsed

    def process_signature(self, signature):
        """Can be overridden."""
        return signature

    def _render(self, element):
        if isinstance(element, str):
            object_ = utils.import_object(element)
            if utils.ismethod(object_):
                # we remove the modules when displaying the methods
                signature_override = ".".join(element.split(".")[-2:])
            else:
                signature_override = element
        else:
            signature_override = None
            object_ = element

        return self._render_from_object(object_, signature_override)

    def _render_from_object(self, object_, signature_override: str):
        subblocks = []
        if self.project_url is not None:
            subblocks.append(utils.make_source_link(object_, self.project_url))
        signature = get_signature(
            object_, signature_override, self.max_signature_line_length
        )
        signature = self.process_signature(signature)
        subblocks.append(f"{self.titles_size} {object_.__name__}\n")
        subblocks.append(utils.code_snippet(signature))

        docstring = getdoc(object_)
        if docstring:
            if isclass(object_):
                type_hints = get_type_hints(object_.__init__)
            else:
                type_hints = get_type_hints(object_)
            docstring = self.process_docstring(docstring, type_hints)
            subblocks.append(docstring)
        return "\n\n".join(subblocks) + "\n\n----\n\n"

    def _fill_aliases(self, extra_aliases):
        for list_elements in self.pages.values():
            for element_as_str in list_elements:
                element = utils.import_object(element_as_str)
                if not isclass(element):
                    continue
                true_dotted_path = utils.get_dotted_path(element)
                self.class_aliases[true_dotted_path] = element_as_str

        if isinstance(extra_aliases, dict):
            self.class_aliases.update(extra_aliases)
        elif isinstance(extra_aliases, list):
            for alias in extra_aliases:
                full_dotted_path = utils.get_dotted_path(
                    utils.import_object(alias)
                )
                self.class_aliases[full_dotted_path] = alias


================================================
FILE: docs/keras_autodoc/docstring.py
================================================
import itertools
import re

from sphinx.util.typing import stringify_annotation

from . import utils


def get_code_blocks(docstring):
    code_blocks = {}
    tmp = docstring[:]
    while "```" in tmp:
        tmp = tmp[tmp.find("```") :]
        index = tmp[3:].find("```") + 6
        snippet = tmp[:index]
        # Place marker in docstring for later reinjection.
        token = f"$KERAS_AUTODOC_CODE_BLOCK_{len(code_blocks)}"
        docstring = docstring.replace(snippet, token)
        code_blocks[token] = snippet
        tmp = tmp[index:]

    return code_blocks, docstring


def get_section_end(docstring, section_start):
    regex_indented_sections_end = re.compile(r"\S\n+(\S|$)")
    end = re.search(regex_indented_sections_end, docstring[section_start:])
    section_end = section_start + end.end()
    if section_end == len(docstring):
        return section_end
    else:
        return section_end - 2


def get_google_style_sections_without_code(docstring):
    regex_indented_sections_start = re.compile(r"\n# .+?\n")

    google_style_sections = {}
    for i in itertools.count():
        match = re.search(regex_indented_sections_start, docstring)
        if match is None:
            break
        section_start = match.start() + 1
        section_end = get_section_end(docstring, section_start)
        google_style_section = docstring[section_start:section_end]
        token = f"KERAS_AUTODOC_GOOGLE_STYLE_SECTION_{i}"
        google_style_sections[token] = google_style_section
        docstring = utils.insert_in_string(
            docstring, token, section_start, section_end
        )
    return google_style_sections, docstring


def get_google_style_sections(docstring):
    # First, extract code blocks and process them.
    # The parsing is easier if the #, : and other symbols aren't there.
    code_blocks, docstring = get_code_blocks(docstring)

    google_style_sections, docstring = get_google_style_sections_without_code(
        docstring
    )

    docstring = reinject_strings(docstring, code_blocks)
    for section_token, section in google_style_sections.items():
        google_style_sections[section_token] = reinject_strings(
            section, code_blocks
        )
    return google_style_sections, docstring


def to_markdown(
    google_style_section: str, types: dict = None, aliases=None
) -> str:
    end_first_line = google_style_section.find("\n")
    section_title = google_style_section[2:end_first_line]
    section_body = google_style_section[end_first_line + 1 :]
    section_body = utils.remove_indentation(section_body.strip())

    # it's a list of elements, a special formatting is applied.
    if section_title == "Arguments":
        section_body = format_as_markdown_list(section_body, types, aliases)
    elif section_title in ("Attributes", "Raises"):
        section_body = format_as_markdown_list(section_body)

    if section_body:
        return f"__{section_title}__\n\n{section_body}\n"
    else:
        return f"__{section_title}__\n"


def format_as_markdown_list(
    section_body, types: dict = None, aliases: dict = None
):
    section_body = re.sub(r"\n([^ ].*?):", r"\n- __\1__:", section_body)
    section_body = re.sub(r"^([^ ].*?):", r"- __\1__:", section_body)

    # Optionally add type annotations to docstring
    if types:
        for arg, arg_type in types.items():
            type_hint_str = apply_aliases(
                stringify_annotation(arg_type), aliases
            )
            section_body = re.sub(
                rf"(- __{arg}__)", rf"\1 `{type_hint_str}`", section_body
            )

    return section_body


def apply_aliases(string: str, aliases: dict):
    for dotted_path, alias in aliases.items():
        string = string.replace(dotted_path, alias)
    return string


def reinject_strings(target, strings_to_inject):
    for token, string_to_inject in strings_to_inject.items():
        target = target.replace(token, string_to_inject)
    return target


def process_docstring(docstring, types: dict = None, aliases=None):
    if docstring[-1] != "\n":
        docstring += "\n"
    google_style_sections, docstring = get_google_style_sections(docstring)

    for token, google_style_section in google_style_sections.items():
        markdown_section = to_markdown(google_style_section, types, aliases)
        docstring = docstring.replace(token, markdown_section)
    return docstring


================================================
FILE: docs/keras_autodoc/examples.py
================================================
import os
import pathlib


def copy_examples(examples_dir, destination_dir):
    """Copy the examples directory in the documentation.

    Prettify files by extracting the docstrings written in Markdown.
    """
    pathlib.Path(destination_dir).mkdir(exist_ok=True)
    for file in os.listdir(examples_dir):
        if not file.endswith(".py"):
            continue
        module_path = os.path.join(examples_dir, file)
        docstring, starting_line = get_module_docstring(module_path)
        destination_file = os.path.join(destination_dir, file[:-2] + "md")
        with open(destination_file, "w+", encoding="utf-8") as f_out, open(
            examples_dir / file, "r+", encoding="utf-8"
        ) as f_in:
            f_out.write(docstring + "\n\n")

            # skip docstring
            for _ in range(starting_line):
                next(f_in)

            f_out.write("```python\n")
            # next line might be empty.
            line = next(f_in)
            if line != "\n":
                f_out.write(line)

            # copy the rest of the file.
            for line in f_in:
                f_out.write(line)
            f_out.write("```")


def get_module_docstring(filepath):
    """Extract the module docstring.

    Also finds the line at which the docstring ends.
    """
    co = compile(open(filepath, encoding="utf-8").read(), filepath, "exec")
    if co.co_consts and isinstance(co.co_consts[0], str):
        docstring = co.co_consts[0]
    else:
        print("Could not get the docstring from " + filepath)
        docstring = ""
    return docstring, co.co_firstlineno


================================================
FILE: docs/keras_autodoc/gathering_members.py
================================================
import inspect
from inspect import isclass
from inspect import isfunction
from inspect import isroutine
from typing import List

from .utils import import_object


def get_classes(module, exclude: List[str] = None, return_strings: bool = True):
    """Get all the classes of a module.

    # Arguments

        module: The module to fetch the classes from. If it's a string, it
            should be in the dotted format. `'keras.layers'` for example.
        exclude: The names which will be excluded from the returned list. For
            example, `get_classes('keras.layers', exclude=['Dense', 'Conv2D'])`.
        return_strings: If False, the actual classes will be returned. Note that
            if you use aliases when building your docs, you should use strings.
            This is because the computed signature uses
            `__name__` and `__module__` if you don't provide a string as input.

    # Returns

     A list of strings or a list of classes.
    """
    return _get_all_module_element(module, exclude, return_strings, True)


def get_functions(
    module, exclude: List[str] = None, return_strings: bool = True
):
    """Get all the functions of a module.

    # Arguments

        module: The module to fetch the functions from. If it's a string, it
            should be in the dotted format. `'keras.backend'` for example.
        exclude: The names which will be excluded from the returned list. For
            example, `get_functions('keras.backend', exclude=['max'])`.
        return_strings: If False, the actual functions will be returned. Note
            that if you use aliases when building your docs, you should use
            strings.  This is because the computed signature uses `__name__` and
            `__module__` if you don't provide a string as input.

    # Returns

     A list of strings or a list of functions.
    """
    return _get_all_module_element(module, exclude, return_strings, False)


def get_methods(cls, exclude=None, return_strings=True):
    """Get all the method of a class.

    # Arguments

        cls: The class to fetch the methods from. If it's a
            string, it should be in the dotted format. `'keras.layers.Dense'`
            for example.
        exclude: The names which will be excluded from the returned list. For
            example, `get_methods('keras.Model', exclude=['save'])`.
        return_strings: If False, the actual methods will be returned. Note that
            if you use aliases when building your docs, you should use strings.
            This is because the computed signature uses
            `__name__` and `__module__` if you don't provide a string as input.

    # Returns

     A list of strings or a list of methods.
    """
    if isinstance(cls, str):
        cls_str = cls
        cls = import_object(cls)
    else:
        cls_str = f"{cls.__module__}.{cls.__name__}"
    exclude = exclude or []
    methods = []
    for _, method in inspect.getmembers(cls, predicate=isroutine):
        if method.__name__[0] == "_" or method.__name__ in exclude:
            continue
        if return_strings:
            methods.append(f"{cls_str}.{method.__name__}")
        else:
            methods.append(method)
    return methods


def _get_all_module_element(module, exclude, return_strings, class_):
    if isinstance(module, str):
        module = import_object(module)
    exclude = exclude or []
    module_data = []
    for name in dir(module):
        module_member = getattr(module, name)
        if not (isfunction(module_member) or isclass(module_member)):
            continue
        if name[0] == "_" or name in exclude:
            continue
        if module.__name__ not in module_member.__module__:
            continue
        if module_member in module_data:
            continue
        if class_ and not isclass(module_member):
            continue
        if not class_ and not isfunction(module_member):
            continue
        if return_strings:
            module_data.append(f"{module.__name__}.{name}")
        else:
            module_data.append(module_member)
    module_data.sort(key=id)
    return module_data


================================================
FILE: docs/keras_autodoc/get_signatures.py
================================================
import inspect
import warnings

import black
from sphinx.util.inspect import signature
from sphinx.util.inspect import stringify_signature

from . import utils


def get_signature_start(function):
    """For the Dense layer, it should return the string 'keras.layers.Dense'"""
    if utils.ismethod(function):
        prefix = f"{utils.get_class_from_method(function).__name__}."
    else:
        try:
            prefix = f"{function.__module__}."
        except AttributeError:
            warnings.warn(
                f"function {function} has no module. "
                f"It will not be included in the signature."
            )
            prefix = ""

    return f"{prefix}{function.__name__}"


def get_signature_end(function):
    sig = signature(function)
    signature_end = stringify_signature(sig, show_annotation=False)
    if utils.ismethod(function):
        signature_end = signature_end.replace("(self, ", "(")
        signature_end = signature_end.replace("(self)", "()")
    return signature_end


def get_function_signature(function, override=None, max_line_length: int = 110):
    if override is None:
        signature_start = get_signature_start(function)
    else:
        signature_start = override
    signature_end = get_signature_end(function)
    return format_signature(signature_start, signature_end, max_line_length)


def get_class_signature(cls, override=None, max_line_length: int = 110):
    if override is None:
        signature_start = f"{cls.__module__}.{cls.__name__}"
    else:
        signature_start = override
    signature_end = get_signature_end(cls.__init__)
    return format_signature(signature_start, signature_end, max_line_length)


def get_signature(object_, override=None, max_line_length: int = 110):
    if inspect.isclass(object_):
        return get_class_signature(object_, override, max_line_length)
    else:
        return get_function_signature(object_, override, max_line_length)


def format_signature(
    signature_start: str, signature_end: str, max_line_length: int = 110
):
    """pretty formatting to avoid long signatures on one single line"""

    # first, we make it look like a real function declaration.
    fake_signature_start = "x" * len(signature_start)
    fake_signature = fake_signature_start + signature_end
    fake_python_code = f"def {fake_signature}:\n    pass\n"

    # we format with black
    mode = black.FileMode(line_length=max_line_length)
    formatted_fake_python_code = black.format_str(fake_python_code, mode=mode)

    # we make the final, multiline signature
    new_signature_end = extract_signature_end(formatted_fake_python_code)
    return signature_start + new_signature_end


def extract_signature_end(function_definition):
    start = function_definition.find("(")
    stop = function_definition.rfind(")")
    return function_definition[start : stop + 1]


================================================
FILE: docs/keras_autodoc/utils.py
================================================
import importlib
import inspect
import os
import re


def count_leading_spaces(s):
    ws = re.search(r"\S", s)
    if ws:
        return ws.start()
    else:
        return 0


def insert_in_file(markdown_text, file_path):
    """Save module page.

    Either insert content into existing page,
    or create page otherwise."""
    if file_path.exists():
        template = file_path.read_text(encoding="utf-8")
        if "{{autogenerated}}" not in template:
            raise RuntimeError(
                f"Template found for {file_path} but missing "
                f"{{autogenerated}} tag."
            )
        markdown_text = template.replace("{{autogenerated}}", markdown_text)
        print("...inserting autogenerated content into template:", file_path)
    else:
        print("...creating new page with autogenerated content:", file_path)
    os.makedirs(file_path.parent, exist_ok=True)
    file_path.write_text(markdown_text, encoding="utf-8")


def code_snippet(snippet):
    return f"```python\n{snippet}\n```\n"


def make_source_link(cls, project_url):
    if isinstance(project_url, dict):
        base_module = cls.__module__.split(".")[0]
        project_url = project_url[base_module]
    path = cls.__module__.replace(".", "/")
    line = inspect.getsourcelines(cls)[-1]
    return (
        f'<span style="float:right;">'
        f"[[source]]({project_url}/{path}.py#L{line})"
        f"</span>"
    )


def format_classes_list(classes, page_name):
    for i in range(len(classes)):
        if not isinstance(classes[i], (list, tuple)):
            classes[i] = (classes[i], [])
    for class_, class_methods in classes:
        if not inspect.isclass(class_):
            # TODO: add a test for this
            raise TypeError(
                f"{class_} was given in the class list "
                f"of {page_name} but {class_} is not a Python class."
            )
    return classes


def get_class_from_method(meth):
    """See
    https://stackoverflow.com/questions/3589311/
    get-defining-class-of-unbound-method-object-in-python-3/
    25959545#25959545
    """
    if inspect.ismethod(meth):
        for cls in inspect.getmro(meth.__self__.__class__):
            if cls.__dict__.get(meth.__name__) is meth:
                return cls
        meth = meth.__func__  # fallback to __qualname__ parsing
    if inspect.isfunction(meth):
        cls = getattr(
            inspect.getmodule(meth),
            meth.__qualname__.split(".<locals>", 1)[0].rsplit(".", 1)[0],
        )
        if isinstance(cls, type):
            return cls
    return getattr(
        meth, "__objclass__", None
    )  # handle special descriptor objects


def ismethod(function):
    return get_class_from_method(function) is not None


def import_object(string: str):
    """Import an object from a string.

    The object can be a function, class or method.
    For example: `'keras.layers.Dense.get_weights'` is valid.
    """
    last_object_got = None
    seen_names = []
    for name in string.split("."):
        seen_names.append(name)
        try:
            last_object_got = importlib.import_module(".".join(seen_names))
        except ModuleNotFoundError:
            last_object_got = getattr(last_object_got, name)
    return last_object_got


def get_type(object_) -> str:
    if inspect.isclass(object_):
        return "class"
    elif ismethod(object_):
        return "method"
    elif inspect.isfunction(object_):
        return "function"
    else:
        raise TypeError(
            f"{object_} is detected as neither a class, a method nor"
            f"a function."
        )


def insert_in_string(target, string_to_insert, start, end):
    target_start_cut = target[:start]
    target_end_cut = target[end:]
    return target_start_cut + string_to_insert + target_end_cut


def remove_indentation(string):
    string = string.replace("\n    ", "\n")
    if string[:4] == "    ":
        string = string[4:]
    return string


def get_dotted_path(class_):
    return f"{class_.__module__}.{class_.__qualname__}"


================================================
FILE: docs/mkdocs.yml
================================================
site_name: AutoKeras
theme:
  favicon: '/img/favicon.png'
  logo: '/img/logo_white.svg'
  name: 'material'

docs_dir: sources
repo_url: https://github.com/keras-team/autokeras
site_url: http://autokeras.com
edit_uri: ""
site_description: 'Documentation for AutoKeras.'
markdown_extensions:
  - codehilite
  - pymdownx.superfences:
      custom_fences:
        - name: mermaid
          class: mermaid
          format: !!python/name:pymdownx.superfences.fence_div_format
  - pymdownx.emoji:
      emoji_index: !!python/name:material.extensions.emoji.twemoji
      emoji_generator: !!python/name:material.extensions.emoji.to_svg
  - admonition

extra:
  analytics:
    provider: google
    property: G-GTF9QP8DFD

extra_css:
  - stylesheets/extra.css

extra_javascript:
  - https://unpkg.com/mermaid@8.4.4/dist/mermaid.min.js

nav:
  - Home: index.md
  - Installation: install.md
  - Tutorials:
    - Overview: tutorial/overview.md
    - Image Classification: tutorial/image_classification.md
    - Image Regression: tutorial/image_regression.md
    - Text Classification: tutorial/text_classification.md
    - Text Regression: tutorial/text_regression.md
    - Structured Data Classification: tutorial/structured_data_classification.md
    - Structured Data Regression: tutorial/structured_data_regression.md
    - Multi-Modal and Multi-Task: tutorial/multi.md
    - Customized Model: tutorial/customized.md
    - Export Model: tutorial/export.md
    - Load Data from Disk: tutorial/load.md
    - FAQ: tutorial/faq.md
  - Contributing Guide: contributing.md
  - Documentation:
    - ImageClassifier: image_classifier.md
    - ImageRegressor: image_regressor.md
    - TextClassifier: text_classifier.md
    - TextRegressor: text_regressor.md
    - StructuredDataClassifier: structured_data_classifier.md
    - StructuredDataRegressor: structured_data_regressor.md
    - AutoModel: auto_model.md
    - Base Class: base.md
    - Node: node.md
    - Block: block.md
    - Utils: utils.md
  - About: about.md


================================================
FILE: docs/py/customized.py
================================================
"""shell
export KERAS_BACKEND="torch"
pip install autokeras
"""

import keras
import numpy as np
import tree
from keras.datasets import mnist

import autokeras as ak

"""
In this tutorial, we show how to customize your search space with
[AutoModel](/auto_model/#automodel-class) and how to implement your own block
as search space.  This API is mainly for advanced users who already know what
their model should look like.

## Customized Search Space
First, let us see how we can build the following neural network using the
building blocks in AutoKeras.

<div class="mermaid">
graph LR
    id1(ImageInput) --> id2(Normalization)
    id2 --> id3(Image Augmentation)
    id3 --> id4(Convolutional)
    id3 --> id5(ResNet V2)
    id4 --> id6(Merge)
    id5 --> id6
    id6 --> id7(Classification Head)
</div>

We can make use of the [AutoModel](/auto_model/#automodel-class) API in
AutoKeras to implemented as follows.
The usage is the same as the [Keras functional
API](https://keras.io/api/models/model/#with-the-functional-api).
Since this is just a demo, we use small amount of `max_trials` and `epochs`.
"""


input_node = ak.ImageInput()
output_node = ak.Normalization()(input_node)
output_node1 = ak.ConvBlock()(output_node)
output_node2 = ak.ResNetBlock(version="v2")(output_node)
output_node = ak.Merge()([output_node1, output_node2])
output_node = ak.ClassificationHead()(output_node)

auto_model = ak.AutoModel(
    inputs=input_node, outputs=output_node, overwrite=True, max_trials=1
)

"""
Whild building the model, the blocks used need to follow this topology:
`Preprocessor` -> `Block` -> `Head`. `Normalization` and `ImageAugmentation`
are `Preprocessor`s.
`ClassificationHead` is `Head`. The rest are `Block`s.

In the code above, we use `ak.ResNetBlock(version='v2')` to specify the version
of ResNet to use.  There are many other arguments to specify for each building
block.  For most of the arguments, if not specified, they would be tuned
automatically.  Please refer to the documentation links at the bottom of the
page for more details.

Then, we prepare some data to run the model.
"""


(x_train, y_train), (x_test, y_test) = mnist.load_data()
print(x_train.shape)  # (60000, 28, 28)
print(y_train.shape)  # (60000,)
print(y_train[:3])  # array([7, 2, 1], dtype=uint8)

# Feed the AutoModel with training data.
auto_model.fit(x_train[:100], y_train[:100], epochs=1)
# Predict with the best model.
predicted_y = auto_model.predict(x_test)
# Evaluate the best model with testing data.
print(auto_model.evaluate(x_test, y_test))

"""
For multiple input nodes and multiple heads search space, you can refer to
[this section](/tutorial/multi/#customized-search-space).

## Validation Data
If you would like to provide your own validation data or change the ratio of
the validation data, please refer to the Validation Data section of the
tutorials of [Image
Classification](/tutorial/image_classification/#validation-data), [Text
Classification](/tutorial/text_classification/#validation-data), [Structured
Data
Classification](/tutorial/structured_data_classification/#validation-data),
[Multi-task and Multiple Validation](/tutorial/multi/#validation-data).

## Implement New Block

You can extend the [Block](/base/#block-class)
class to implement your own building blocks and use it with
[AutoModel](/auto_model/#automodel-class).

The first step is to learn how to write a build function for
[KerasTuner](https://keras-team.github.io/keras-tuner/#usage-the-basics).  You
need to override the [build function](/base/#build-method) of the block.  The
following example shows how to implement a single Dense layer block whose
number of neurons is tunable.
"""


class SingleDenseLayerBlock(ak.Block):
    def build(self, hp, inputs=None):
        # Get the input_node from inputs.
        input_node = tree.flatten(inputs)[0]
        layer = keras.layers.Dense(
            hp.Int("num_units", min_value=32, max_value=512, step=32)
        )
        output_node = layer(input_node)
        return output_node


"""
You can connect it with other blocks and build it into an
[AutoModel](/auto_model/#automodel-class).
"""

# Build the AutoModel
input_node = ak.Input()
output_node = SingleDenseLayerBlock()(input_node)
output_node = ak.RegressionHead()(output_node)
auto_model = ak.AutoModel(input_node, output_node, overwrite=True, max_trials=1)
# Prepare Data
num_instances = 100
x_train = np.random.rand(num_instances, 20).astype(np.float32)
y_train = np.random.rand(num_instances, 1).astype(np.float32)
x_test = np.random.rand(num_instances, 20).astype(np.float32)
y_test = np.random.rand(num_instances, 1).astype(np.float32)
# Train the model
auto_model.fit(x_train, y_train, epochs=1)
print(auto_model.evaluate(x_test, y_test))

"""
## Reference

[AutoModel](/auto_model/#automodel-class)

**Nodes**:
[ImageInput](/node/#imageinput-class),
[Input](/node/#input-class),
[TextInput](/node/#textinput-class).
[StructuredDataInput](/node/#structureddatainput-class),

**Preprocessors**:
[FeatureEngineering](/block/#featureengineering-class),
[ImageAugmentation](/block/#imageaugmentation-class),
[LightGBM](/block/#lightgbm-class),
[Normalization](/block/#normalization-class),

**Blocks**:
[ConvBlock](/block/#convblock-class),
[DenseBlock](/block/#denseblock-class),
[Embedding](/block/#embedding-class),
[Merge](/block/#merge-class),
[ResNetBlock](/block/#resnetblock-class),
[RNNBlock](/block/#rnnblock-class),
[SpatialReduction](/block/#spatialreduction-class),
[TemporalReduction](/block/#temporalreduction-class),
[XceptionBlock](/block/#xceptionblock-class),
[ImageBlock](/block/#imageblock-class),
[TextBlock](/block/#textblock-class).
[StructuredDataBlock](/block/#structureddatablock-class),
"""


================================================
FILE: docs/py/export.py
================================================
"""shell
export KERAS_BACKEND="torch"
pip install autokeras
"""

import numpy as np
from keras.datasets import mnist
from keras.models import load_model

import autokeras as ak

"""
You can easily export your model the best model found by AutoKeras as a Keras
Model.

The following example uses [ImageClassifier](/image_classifier) as an example.
All the tasks and the [AutoModel](/auto_model/#automodel-class) has this
[export_model](/auto_model/#export_model-method) function.

"""


(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Initialize the image classifier.
clf = ak.ImageClassifier(
    overwrite=True, max_trials=1
)  # Try only 1 model.(Increase accordingly)
# Feed the image classifier with training data.
clf.fit(x_train, y_train, epochs=1)  # Change no of epochs to improve the model
# Export as a Keras Model.
model = clf.export_model()

print(type(model))  # <class 'keras.engine.training.Model'>

model.save("model_autokeras.keras")


loaded_model = load_model(
    "model_autokeras.keras", custom_objects=ak.CUSTOM_OBJECTS
)

predicted_y = loaded_model.predict(np.expand_dims(x_test, -1))
print(predicted_y)


================================================
FILE: docs/py/image_classification.py
================================================
"""shell
export KERAS_BACKEND="torch"
pip install autokeras
"""

from keras.datasets import mnist

import autokeras as ak

"""
## A Simple Example
The first step is to prepare your data. Here we use the MNIST dataset as an
example
"""

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train[:100]
y_train = y_train[:100]
x_test = x_test[:100]
y_test = y_test[:100]
print(x_train.shape)  # (60000, 28, 28)
print(y_train.shape)  # (60000,)
print(y_train[:3])  # array([7, 2, 1], dtype=uint8)

"""
The second step is to run the ImageClassifier.
It is recommended have more trials for more complicated datasets.
This is just a quick demo of MNIST, so we set max_trials to 1.
For the same reason, we set epochs to 10.
You can also leave the epochs unspecified for an adaptive number of epochs.
"""

# Initialize the image classifier.
clf = ak.ImageClassifier(overwrite=True, max_trials=1)
# Feed the image classifier with training data.
clf.fit(x_train, y_train, epochs=1)


# Predict with the best model.
predicted_y = clf.predict(x_test)
print(predicted_y)


# Evaluate the best model with testing data.
print(clf.evaluate(x_test, y_test))

"""
## Validation Data
By default, AutoKeras use the last 20% of training data as validation data. As
shown in the example below, you can use validation_split to specify the
percentage.
"""

clf.fit(
    x_train,
    y_train,
    # Split the training data and use the last 15% as validation data.
    validation_split=0.15,
    epochs=1,
)

"""
You can also use your own validation set instead of splitting it from the
training data with validation_data.
"""

split = 50000
x_val = x_train[split:]
y_val = y_train[split:]
x_train = x_train[:split]
y_train = y_train[:split]
clf.fit(
    x_train,
    y_train,
    # Use your own validation set.
    validation_data=(x_val, y_val),
    epochs=1,
)

"""
## Customized Search Space
For advanced users, you may customize your search space by using AutoModel
instead of ImageClassifier. You can configure the ImageBlock for some
high-level configurations, e.g., block_type for the type of neural network to
search, normalize for whether to do data normalization, augment for whether to
do data augmentation. You can also do not specify these arguments, which would
leave the different choices to be tuned automatically. See the following
example for detail.
"""

input_node = ak.ImageInput()
output_node = ak.ImageBlock(
    # Only search ResNet architectures.
    block_type="resnet",
    # Normalize the dataset.
    normalize=True,
    # Do not do data augmentation.
    augment=False,
)(input_node)
output_node = ak.ClassificationHead()(output_node)
clf = ak.AutoModel(
    inputs=input_node, outputs=output_node, overwrite=True, max_trials=1
)
clf.fit(x_train, y_train, epochs=1)

"""
The usage of AutoModel is similar to the functional API of Keras. Basically, you
are building a graph, whose edges are blocks and the nodes are intermediate
outputs of blocks. To add an edge from input_node to output_node with
output_node = ak.[some_block]([block_args])(input_node).

You can even also use more fine grained blocks to customize the search space
even further. See the following example.
"""

input_node = ak.ImageInput()
output_node = ak.Normalization()(input_node)
output_node = ak.ImageAugmentation(horizontal_flip=False)(output_node)
output_node = ak.ResNetBlock(version="v2")(output_node)
output_node = ak.ClassificationHead()(output_node)
clf = ak.AutoModel(
    inputs=input_node, outputs=output_node, overwrite=True, max_trials=1
)
clf.fit(x_train, y_train, epochs=1)

"""
## Reference
[ImageClassifier](/image_classifier),
[AutoModel](/auto_model/#automodel-class),
[ImageBlock](/block/#imageblock-class),
[Normalization](/block/#normalization-class),
[ImageAugmentation](/block/#image-augmentation-class),
[ResNetBlock](/block/#resnetblock-class),
[ImageInput](/node/#imageinput-class),
[ClassificationHead](/block/#classificationhead-class).
"""


================================================
FILE: docs/py/image_regression.py
================================================
"""shell
export KERAS_BACKEND="torch"
pip install autokeras
"""

from keras.datasets import mnist

import autokeras as ak

"""
To make this tutorial easy to follow, we just treat MNIST dataset as a
regression dataset. It means we will treat prediction targets of MNIST dataset,
which are integers ranging from 0 to 9 as numerical values, so that they can be
directly used as the regression targets.

## A Simple Example
The first step is to prepare your data. Here we use the MNIST dataset as an
example
"""


(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train[:100]
y_train = y_train[:100]
x_test = x_test[:100]
y_test = y_test[:100]
print(x_train.shape)  # (60000, 28, 28)
print(y_train.shape)  # (60000,)
print(y_train[:3])  # array([7, 2, 1], dtype=uint8)

"""
The second step is to run the ImageRegressor.  It is recommended have more
trials for more complicated datasets.  This is just a quick demo of MNIST, so
we set max_trials to 1.  For the same reason, we set epochs to 1.  You can also
leave the epochs unspecified for an adaptive number of epochs.
"""

# Initialize the image regressor.
reg = ak.ImageRegressor(overwrite=True, max_trials=1)
# Feed the image regressor with training data.
reg.fit(x_train, y_train, epochs=1)


# Predict with the best model.
predicted_y = reg.predict(x_test)
print(predicted_y)


# Evaluate the best model with testing data.
print(reg.evaluate(x_test, y_test))

"""
## Validation Data
By default, AutoKeras use the last 20% of training data as validation data. As
shown in the example below, you can use validation_split to specify the
percentage.
"""

reg.fit(
    x_train,
    y_train,
    # Split the training data and use the last 15% as validation data.
    validation_split=0.15,
    epochs=1,
)

"""
You can also use your own validation set instead of splitting it from the
training data with validation_data.
"""

split = 50000
x_val = x_train[split:]
y_val = y_train[split:]
x_train = x_train[:split]
y_train = y_train[:split]
reg.fit(
    x_train,
    y_train,
    # Use your own validation set.
    validation_data=(x_val, y_val),
    epochs=2,
)

"""
## Customized Search Space
For advanced users, you may customize your search space by using AutoModel
instead of ImageRegressor. You can configure the ImageBlock for some high-level
configurations, e.g., block_type for the type of neural network to search,
normalize for whether to do data normalization, augment for whether to do data
augmentation. You can also do not specify these arguments, which would leave
the different choices to be tuned automatically. See the following example for
detail.
"""

input_node = ak.ImageInput()
output_node = ak.ImageBlock(
    # Only search ResNet architectures.
    block_type="resnet",
    # Normalize the dataset.
    normalize=False,
    # Do not do data augmentation.
    augment=False,
)(input_node)
output_node = ak.RegressionHead()(output_node)
reg = ak.AutoModel(
    inputs=input_node, outputs=output_node, overwrite=True, max_trials=1
)
reg.fit(x_train, y_train, epochs=1)

"""
The usage of AutoModel is similar to the functional API of Keras. Basically,
you are building a graph, whose edges are blocks and the nodes are intermediate
outputs of blocks. To add an edge from input_node to output_node with
output_node = ak.[some_block]([block_args])(input_node).

You can even also use more fine grained blocks to customize the search space
even further. See the following example.
"""

input_node = ak.ImageInput()
output_node = ak.Normalization()(input_node)
output_node = ak.ImageAugmentation(horizontal_flip=False)(output_node)
output_node = ak.ResNetBlock(version="v2")(output_node)
output_node = ak.RegressionHead()(output_node)
reg = ak.AutoModel(
    inputs=input_node, outputs=output_node, overwrite=True, max_trials=1
)
reg.fit(x_train, y_train, epochs=1)

"""
## Reference
[ImageRegressor](/image_regressor),
[AutoModel](/auto_model/#automodel-class),
[ImageBlock](/block/#imageblock-class),
[Normalization](/block/#normalization-class),
[ImageAugmentation](/block/#image-augmentation-class),
[ResNetBlock](/block/#resnetblock-class),
[ImageInput](/node/#imageinput-class),
[RegressionHead](/block/#regressionhead-class).
"""


================================================
FILE: docs/py/multi.py
================================================
"""shell
export KERAS_BACKEND="torch"
pip install autokeras
"""

import numpy as np

import autokeras as ak

"""
In this tutorial we are making use of the
[AutoModel](/auto_model/#automodel-class)
 API to show how to handle multi-modal data and multi-task.

## What is multi-modal?

Multi-modal data means each data instance has multiple forms of information.
For example, a photo can be saved as a image. Besides the image, it may also
have when and where it was taken as its attributes, which can be represented as
numerical data.

## What is multi-task?

Multi-task here we refer to we want to predict multiple targets with the same
input features. For example, we not only want to classify an image according to
its content, but we also want to regress its quality as a float number between
0 and 1.

The following diagram shows an example of multi-modal and multi-task neural
network model.

<div class="mermaid">
graph TD
    id1(ImageInput) --> id3(Some Neural Network Model)
    id2(Input) --> id3
    id3 --> id4(ClassificationHead)
    id3 --> id5(RegressionHead)
</div>

It has two inputs the images and the numerical input data. Each image is
associated with a set of attributes in the numerical input data. From these
data, we are trying to predict the classification label and the regression value
at the same time.

## Data Preparation

To illustrate our idea, we generate some random image and numerical data as
the multi-modal data.
"""


num_instances = 10
# Generate image data.
image_data = np.random.rand(num_instances, 32, 32, 3).astype(np.float32)
# Generate numerical data.
numerical_data = np.random.rand(num_instances, 20).astype(np.float32)

"""
We also generate some multi-task targets for classification and regression.
"""

# Generate regression targets.
regression_target = np.random.rand(num_instances, 1).astype(np.float32)
# Generate classification labels of five classes.
classification_target = np.random.randint(5, size=num_instances)

"""
## Build and Train the Model
Then we initialize the multi-modal and multi-task model with
[AutoModel](/auto_model/#automodel-class).
Since this is just a demo, we use small amount of `max_trials` and `epochs`.
"""


# Initialize the multi with multiple inputs and outputs.
model = ak.AutoModel(
    inputs=[ak.ImageInput(), ak.Input()],
    outputs=[
        ak.RegressionHead(metrics=["mae"]),
        ak.ClassificationHead(
            loss="categorical_crossentropy", metrics=["accuracy"]
        ),
    ],
    overwrite=True,
    max_trials=2,
)
# Fit the model with prepared data.
model.fit(
    [image_data, numerical_data],
    [regression_target, classification_target],
    epochs=1,
    batch_size=3,
)

"""
## Validation Data
By default, AutoKeras use the last 20% of training data as validation data.
As shown in the example below, you can use `validation_split` to specify the
percentage.
"""

model.fit(
    [image_data, numerical_data],
    [regression_target, classification_target],
    # Split the training data and use the last 15% as validation data.
    validation_split=0.15,
    epochs=1,
    batch_size=3,
)

"""
You can also use your own validation set
instead of splitting it from the training data with `validation_data`.
"""

split = 5

image_val = image_data[split:]
numerical_val = numerical_data[split:]
regression_val = regression_target[split:]
classification_val = classification_target[split:]

image_data = image_data[:split]
numerical_data = numerical_data[:split]
regression_target = regression_target[:split]
classification_target = classification_target[:split]

model.fit(
    [image_data, numerical_data],
    [regression_target, classification_target],
    # Use your own validation set.
    validation_data=(
        [image_val, numerical_val],
        [regression_val, classification_val],
    ),
    epochs=1,
    batch_size=3,
)

"""
## Customized Search Space
You can customize your search space.
The following figure shows the search space we want to define.

<div class="mermaid">
graph LR
    id1(ImageInput) --> id2(Normalization)
    id2 --> id3(Image Augmentation)
    id3 --> id4(Convolutional)
    id3 --> id5(ResNet V2)
    id4 --> id6(Merge)
    id5 --> id6
    id7(Input) --> id9(DenseBlock)
    id6 --> id10(Merge)
    id9 --> id10
    id10 --> id11(Classification Head)
    id10 --> id12(Regression Head)
</div>
"""

input_node1 = ak.ImageInput()
output_node = ak.Normalization()(input_node1)
output_node = ak.ImageAugmentation()(output_node)
output_node1 = ak.ConvBlock()(output_node)
output_node2 = ak.ResNetBlock(version="v2")(output_node)
output_node1 = ak.Merge()([output_node1, output_node2])

input_node2 = ak.Input()
output_node2 = ak.DenseBlock()(input_node2)

output_node = ak.Merge()([output_node1, output_node2])
output_node1 = ak.ClassificationHead()(output_node)
output_node2 = ak.RegressionHead()(output_node)

auto_model = ak.AutoModel(
    inputs=[input_node1, input_node2],
    outputs=[output_node1, output_node2],
    overwrite=True,
    max_trials=2,
)

image_data = np.random.rand(num_instances, 32, 32, 3).astype(np.float32)
numerical_data = np.random.rand(num_instances, 20).astype(np.float32)
regression_target = np.random.rand(num_instances, 1).astype(np.float32)
classification_target = np.random.randint(5, size=num_instances)

auto_model.fit(
    [image_data, numerical_data],
    [classification_target, regression_target],
    batch_size=3,
    epochs=1,
)

"""
## Reference
[AutoModel](/auto_model/#automodel-class),
[ImageInput](/node/#imageinput-class),
[Input](/node/#input-class),
[DenseBlock](/block/#denseblock-class),
[RegressionHead](/block/#regressionhead-class),
[ClassificationHead](/block/#classificationhead-class),
[CategoricalToNumerical](/block/#categoricaltonumerical-class).
"""


================================================
FILE: docs/py/structured_data_classification.py
================================================
"""shell
export KERAS_BACKEND="torch"
pip install autokeras
"""

import keras
import pandas as pd

import autokeras as ak

"""
## A Simple Example
The first step is to prepare your data. Here we use the [Titanic
dataset](https://www.kaggle.com/c/titanic) as an example.
"""


TRAIN_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/train.csv"
TEST_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/eval.csv"

train_file_path = keras.utils.get_file("train.csv", TRAIN_DATA_URL)
test_file_path = keras.utils.get_file("eval.csv", TEST_DATA_URL)

# Load data into numpy arrays
train_df = pd.read_csv(train_file_path)
test_df = pd.read_csv(test_file_path)

y_train = train_df["survived"].values
x_train = train_df.drop("survived", axis=1).values

y_test = test_df["survived"].values
x_test = test_df.drop("survived", axis=1).values


"""
The second step is to run the
[StructuredDataClassifier](/structured_data_classifier).
As a quick demo, we set epochs to 10.
You can also leave the epochs unspecified for an adaptive number of epochs.
"""

# Initialize the structured data classifier.
clf = ak.StructuredDataClassifier(
    overwrite=True, max_trials=3
)  # It tries 3 different models.
# Feed the structured data classifier with training data.
clf.fit(
    x_train,
    y_train,
    epochs=10,
)
# Predict with the best model.
predicted_y = clf.predict(x_test)
# Evaluate the best model with testing data.
print(clf.evaluate(x_test, y_test))


"""
You can also specify the column names and types for the data as follows.  The
`column_names` is optional if the training data already have the column names,
e.g.  pandas.DataFrame, CSV file.  Any column, whose type is not specified will
be inferred from the training data.
"""

# Initialize the structured data classifier.
clf = ak.StructuredDataClassifier(
    column_names=[
        "sex",
        "age",
        "n_siblings_spouses",
        "parch",
        "fare",
        "class",
        "deck",
        "embark_town",
        "alone",
    ],
    column_types={"sex": "categorical", "fare": "numerical"},
    max_trials=10,  # It tries 10 different models.
    overwrite=True,
)


"""
## Validation Data
By default, AutoKeras use the last 20% of training data as validation data.  As
shown in the example below, you can use `validation_split` to specify the
percentage.
"""

clf.fit(
    x_train,
    y_train,
    # Split the training data and use the last 15% as validation data.
    validation_split=0.15,
    epochs=10,
)

"""
You can also use your own validation set
instead of splitting it from the training data with `validation_data`.
"""

split = 500
x_val = x_train[split:]
y_val = y_train[split:]
x_train = x_train[:split]
y_train = y_train[:split]
clf.fit(
    x_train,
    y_train,
    # Use your own validation set.
    validation_data=(x_val, y_val),
    epochs=10,
)


"""
## Reference
[StructuredDataClassifier](/structured_data_classifier),
[AutoModel](/auto_model/#automodel-class),
[StructuredDataBlock](/block/#structureddatablock-class),
[DenseBlock](/block/#denseblock-class),
[StructuredDataInput](/node/#structureddatainput-class),
[ClassificationHead](/block/#classificationhead-class),
"""


================================================
FILE: docs/py/structured_data_regression.py
================================================
"""shell
export KERAS_BACKEND="torch"
pip install autokeras
"""

from sklearn.datasets import fetch_california_housing

import autokeras as ak

"""
## A Simple Example
The first step is to prepare your data. Here we use the [California housing
dataset](
https://scikit-learn.org/stable/datasets/real_world.html#california-housing-dataset)
as an example.
"""


house_dataset = fetch_california_housing()
train_size = int(house_dataset.data.shape[0] * 0.9)

x_train = house_dataset.data[:train_size]
y_train = house_dataset.target[:train_size]
x_test = house_dataset.data[train_size:]
y_test = house_dataset.target[train_size:]

"""
The second step is to run the
[StructuredDataRegressor](/structured_data_regressor).
As a quick demo, we set epochs to 10.
You can also leave the epochs unspecified for an adaptive number of epochs.
"""

# Initialize the structured data regressor.
reg = ak.StructuredDataRegressor(
    overwrite=True, max_trials=3
)  # It tries 3 different models.
# Feed the structured data regressor with training data.
reg.fit(
    x_train,
    y_train,
    epochs=10,
)
# Predict with the best model.
predicted_y = reg.predict(x_test)
# Evaluate the best model with testing data.
print(reg.evaluate(x_test, y_test))

"""
You can also specify the column names and types for the data as follows.  The
`column_names` is optional if the training data already have the column names,
e.g.  pandas.DataFrame, CSV file.  Any column, whose type is not specified will
be inferred from the training data.
"""

# Initialize the structured data regressor.
reg = ak.StructuredDataRegressor(
    column_names=[
        "MedInc",
        "HouseAge",
        "AveRooms",
        "AveBedrms",
        "Population",
        "AveOccup",
        "Latitude",
        "Longitude",
    ],
    column_types={"MedInc": "numerical", "Latitude": "numerical"},
    max_trials=10,  # It tries 10 different models.
    overwrite=True,
)


"""
## Validation Data
By default, AutoKeras use the last 20% of training data as validation data.  As
shown in the example below, you can use `validation_split` to specify the
percentage.
"""

reg.fit(
    x_train,
    y_train,
    # Split the training data and use the last 15% as validation data.
    validation_split=0.15,
    epochs=10,
)

"""
You can also use your own validation set
instead of splitting it from the training data with `validation_data`.
"""

split = 500
x_val = x_train[split:]
y_val = y_train[split:]
x_train = x_train[:split]
y_train = y_train[:split]
reg.fit(
    x_train,
    y_train,
    # Use your own validation set.
    validation_data=(x_val, y_val),
    epochs=10,
)


"""
## Reference
[StructuredDataRegressor](/structured_data_regressor),
[AutoModel](/auto_model/#automodel-class),
[StructuredDataBlock](/block/#structureddatablock-class),
[DenseBlock](/block/#denseblock-class),
[StructuredDataInput](/node/#structureddatainput-class),
[RegressionHead](/block/#regressionhead-class),
"""


================================================
FILE: docs/py/text_classification.py
================================================
"""shell
export KERAS_BACKEND="torch"
pip install autokeras
"""

import os

import keras
import numpy as np
from sklearn.datasets import load_files

import autokeras as ak

"""
## A Simple Example
The first step is to prepare your data. Here we use the [IMDB
dataset](https://keras.io/datasets/#imdb-movie-reviews-sentiment-classification)
as an example.
"""


dataset = keras.utils.get_file(
    fname="aclImdb.tar.gz",
    origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz",
    extract=True,
)

# set path to dataset
IMDB_DATADIR = os.path.join(
    os.path.dirname(dataset), "aclImdb_extracted", "aclImdb"
)

classes = ["pos", "neg"]
train_data = load_files(
    os.path.join(IMDB_DATADIR, "train"), shuffle=True, categories=classes
)
test_data = load_files(
    os.path.join(IMDB_DATADIR, "test"), shuffle=False, categories=classes
)

x_train = np.array(train_data.data)[:100]
y_train = np.array(train_data.target)[:100]
x_test = np.array(test_data.data)[:100]
y_test = np.array(test_data.target)[:100]

print(x_train.shape)  # (25000,)
print(y_train.shape)  # (25000, 1)
print(x_train[0][:50])  # this film was just brilliant casting

"""
The second step is to run the [TextClassifier](/text_classifier).  As a quick
demo, we set epochs to 2.  You can also leave the epochs unspecified for an
adaptive number of epochs.
"""


# Initialize the text classifier.
clf = ak.TextClassifier(
    overwrite=True, max_trials=1
)  # It only tries 1 model as a quick demo.
# Feed the text classifier with training data.
clf.fit(x_train, y_train, epochs=1, batch_size=2)
# Predict with the best model.
predicted_y = clf.predict(x_test)
# Evaluate the best model with testing data.
print(clf.evaluate(x_test, y_test))


"""
## Validation Data
By default, AutoKeras use the last 20% of training data as validation data.  As
shown in the example below, you can use `validation_split` to specify the
percentage.
"""

clf.fit(
    x_train,
    y_train,
    # Split the training data and use the last 15% as validation data.
    validation_split=0.15,
    epochs=1,
    batch_size=2,
)

"""
You can also use your own validation set instead of splitting it from the
training data with `validation_data`.
"""

split = 5
x_val = x_train[split:]
y_val = y_train[split:]
x_train = x_train[:split]
y_train = y_train[:split]
clf.fit(
    x_train,
    y_train,
    epochs=1,
    # Use your own validation set.
    validation_data=(x_val, y_val),
    batch_size=2,
)

"""
## Customized Search Space
For advanced users, you may customize your search space by using
[AutoModel](/auto_model/#automodel-class) instead of
[TextClassifier](/text_classifier). You can configure the
[TextBlock](/block/#textblock-class) for some high-level configurations. You can
also do not specify these arguments, which would leave the different choices to
be tuned automatically.  See the following example for detail.
"""


input_node = ak.TextInput()
output_node = ak.TextBlock()(input_node)
output_node = ak.ClassificationHead()(output_node)
clf = ak.AutoModel(
    inputs=input_node, outputs=output_node, overwrite=True, max_trials=1
)
clf.fit(x_train, y_train, epochs=1, batch_size=2)

"""
## Reference
[TextClassifier](/text_classifier),
[AutoModel](/auto_model/#automodel-class),
[ConvBlock](/block/#convblock-class),
[TextInput](/node/#textinput-class),
[ClassificationHead](/block/#classificationhead-class).
"""


================================================
FILE: docs/py/text_regression.py
================================================
"""shell
export KERAS_BACKEND="torch"
pip install autokeras
"""

import os

import keras
import numpy as np
from sklearn.datasets import load_files

import autokeras as ak

"""
To make this tutorial easy to follow, we just treat IMDB dataset as a
regression dataset. It means we will treat prediction targets of IMDB dataset,
which are 0s and 1s as numerical values, so that they can be directly used as
the regression targets.

## A Simple Example
The first step is to prepare your data. Here we use the [IMDB
dataset](https://keras.io/datasets/#imdb-movie-reviews-sentiment-classification)
as an example.
"""


dataset = keras.utils.get_file(
    fname="aclImdb.tar.gz",
    origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz",
    extract=True,
)

# set path to dataset
IMDB_DATADIR = os.path.join(
    os.path.dirname(dataset), "aclImdb_extracted", "aclImdb"
)

classes = ["pos", "neg"]
train_data = load_files(
    os.path.join(IMDB_DATADIR, "train"), shuffle=True, categories=classes
)
test_data = load_files(
    os.path.join(IMDB_DATADIR, "test"), shuffle=False, categories=classes
)

x_train = np.array(train_data.data)[:100]
y_train = np.array(train_data.target)[:100]
x_test = np.array(test_data.data)[:100]
y_test = np.array(test_data.target)[:100]

print(x_train.shape)  # (25000,)
print(y_train.shape)  # (25000, 1)
print(x_train[0][:50])  # <START> this film was just brilliant casting <UNK>

"""
The second step is to run the [TextRegressor](/text_regressor).  As a quick
demo, we set epochs to 2.  You can also leave the epochs unspecified for an
adaptive number of epochs.
"""


# Initialize the text regressor.
reg = ak.TextRegressor(
    overwrite=True, max_trials=1  # It tries 10 different models.
)
# Feed the text regressor with training data.
reg.fit(x_train, y_train, epochs=1, batch_size=2)
# Predict with the best model.
predicted_y = reg.predict(x_test)
# Evaluate the best model with testing data.
print(reg.evaluate(x_test, y_test))


"""
## Validation Data
By default, AutoKeras use the last 20% of training data as validation data.  As
shown in the example below, you can use `validation_split` to specify the
percentage.
"""

reg.fit(
    x_train,
    y_train,
    # Split the training data and use the last 15% as validation data.
    validation_split=0.15,
)

"""
You can also use your own validation set instead of splitting it from the
training data with `validation_data`.
"""

split = 5
x_val = x_train[split:]
y_val = y_train[split:]
x_train = x_train[:split]
y_train = y_train[:split]
reg.fit(
    x_train,
    y_train,
    epochs=1,
    # Use your own validation set.
    validation_data=(x_val, y_val),
    batch_size=2,
)

"""
## Customized Search Space
For advanced users, you may customize your search space by using
[AutoModel](/auto_model/#automodel-class) instead of
[TextRegressor](/text_regressor). You can configure the
[TextBlock](/block/#textblock-class) for some high-level configurations. You can
also do not specify these arguments, which would leave the different choices to
be tuned automatically.  See the following example for detail.
"""


input_node = ak.TextInput()
output_node = ak.TextBlock()(input_node)
output_node = ak.RegressionHead()(output_node)
reg = ak.AutoModel(
    inputs=input_node, outputs=output_node, overwrite=True, max_trials=1
)
reg.fit(x_train, y_train, epochs=1, batch_size=2)

"""
## Reference
[TextRegressor](/text_regressor),
[AutoModel](/auto_model/#automodel-class),
[TextBlock](/block/#textblock-class),
[ConvBlock](/block/#convblock-class),
[TextInput](/node/#textinput-class),
[RegressionHead](/block/#regressionhead-class).
"""


================================================
FILE: docs/requirements.txt
================================================
mkdocs
mkdocs-material
pygments
jupyter
pymdown-extensions
Sphinx
markdown
black

================================================
FILE: docs/run_py_files.sh
================================================
#!/bin/bash

SUCCESS_FILE="success.txt"
FAILURE_FILE="failure.txt"

# Clear the files
> $SUCCESS_FILE
> $FAILURE_FILE

for file in py/*.py; do
    if python3 "$file" > /dev/null 2>&1; then
        echo "$file" >> $SUCCESS_FILE
    else
        echo "$file" >> $FAILURE_FILE
    fi
done

================================================
FILE: docs/templates/about.md
================================================
This package is developed by [DATA LAB](http://faculty.cs.tamu.edu/xiahu/) at Texas A&M University,
collaborating with [keras-team](https://github.com/keras-team) for version 1.0 and above.

## Core Team

[**Haifeng Jin**](https://github.com/haifeng-jin):
Created, designed and implemented the AutoKeras system. 
Maintainer.

[**François Chollet**](https://github.com/fchollet):
The API and system architecture design for AutoKeras 1.0.
Code reviews for pull requests.

[**Qingquan Song**](https://github.com/song3134):
Designed the neural architecture search algorithms.
Implemented the tabular data classification and regression module.

[**Xia "Ben" Hu**](http://faculty.cs.tamu.edu/xiahu/):
Project lead and maintainer.


================================================
FILE: docs/templates/index.md
================================================
#
<img src="/img/row_red.svg" alt="drawing" width="400px" style="display: block; margin-left: auto; margin-right: auto"/>

##

{{autogenerated}}


================================================
FILE: docs/templates/install.md
================================================
## Requirements

**Python 3**: Follow the TensorFlow install steps to install Python 3.

**Pip**: Follow the TensorFlow install steps to install Pip.

**PyTorch >= 2.3.0**: AutoKeras is based on Keras. We recommend using the
PyTorch backend.
Please follow [this page](https://pytorch.org/get-started/locally/) to install
PyTorch.

## Install AutoKeras
AutoKeras only support **Python 3**.
If you followed previous steps to use virtualenv to install tensorflow,
you can just activate the virtualenv and use the following command to install AutoKeras. 
```
pip install git+https://github.com/keras-team/keras-tuner.git
pip install autokeras
```

If you did not use virtualenv, and you use `python3` command to execute your python program,
please use the following command to install AutoKeras.
```
python3 -m pip install git+https://github.com/keras-team/keras-tuner.git
python3 -m pip install autokeras
```


================================================
FILE: docs/templates/stylesheets/extra.css
================================================
:root>* {
  --md-primary-fg-color: #d00000;
  --md-accent-fg-color: #d00000;
}

================================================
FILE: docs/templates/tutorial/faq.md
================================================
## How to resume a previously killed run?
This feature is controlled by the `overwrite` argument of `AutoModel` or any other task APIs.
It is set to `False` by default,
which means it would not overwrite the contents of the directory.
In other words, it will continue the previous fit.

You can just run the same code again.
It will automatically resume the previously killed run.

## How to customize metrics and loss?
Please see the code example below.

```python
import autokeras as ak


clf = ak.ImageClassifier(
    max_trials=3,
    metrics=['mse'],
    loss='mse',
)
```

## How to use customized metrics to select the best model?
By default, AutoKeras use validation loss as the metric for selecting the best model.
Below is a code example of using customized metric for selecting models.
Please read the comments for the details.

```python
# Implement your customized metric according to the tutorial.
# https://keras.io/api/metrics/#creating-custom-metrics
import autokeras as ak


def f1_score(y_true, y_pred):
  ...

clf = ak.ImageClassifier(
    max_trials=3,

    # Wrap the function into a Keras Tuner Objective 
    # and pass it to AutoKeras.

    # Direction can be 'min' or 'max'
    # meaning we want to minimize or maximize the metric.

    # 'val_f1_score' is just add a 'val_' prefix
    # to the function name or the metric name.

    objective=kerastuner.Objective('val_f1_score', direction='max'),
    # Include it as one of the metrics.
    metrics=[f1_score],
)
```

================================================
FILE: docs/templates/tutorial/overview.md
================================================
# AutoKeras 1.0 Tutorial

## Supported Tasks

AutoKeras supports several tasks with an extremely simple interface.
You can click the links below to see the detailed tutorial for each task.

**Supported Tasks**:

[Image Classification](/tutorial/image_classification)

[Image Regression](/tutorial/image_regression)

[Text Classification](/tutorial/text_classification)

[Text Regression](/tutorial/text_regression)

[Structured Data Classification](/tutorial/structured_data_classification)

[Structured Data Regression](/tutorial/structured_data_regression)

## Multi-Task and Multi-Modal Data

If you are dealing with multi-task or multi-modal dataset, you can refer to this
[tutorial](/tutorial/multi) for details.


## Customized Model

Follow this [tutorial](/tutorial/customized), to use AutoKeras building blocks to quickly construct your own model.
With these blocks, you only need to specify the high-level architecture of your model.
AutoKeras would search for the best detailed configuration for you.
Moreover, you can override the base classes to create your own block.
The following are the links to the documentation of the predefined input nodes and blocks in AutoKeras.

**Nodes**:

[ImageInput](/node/#imageinput-class)

[Input](/node/#input-class)

[TextInput](/node/#textinput-class)

[StructuredDataInput](/node/#structureddatainput-class)

**Blocks**:

[ImageAugmentation](/block/#imageaugmentation-class)

[Normalization](/block/#normalization-class)

[ConvBlock](/block/#convblock-class)

[DenseBlock](/block/#denseblock-class)

[Embedding](/block/#embedding-class)

[Merge](/block/#merge-class)

[ResNetBlock](/block/#resnetblock-class)

[RNNBlock](/block/#rnnblock-class)

[SpatialReduction](/block/#spatialreduction-class)

[TemporalReduction](/block/#temporalreduction-class)

[XceptionBlock](/block/#xceptionblock-class)

[ImageBlock](/block/#imageblock-class)

[TextBlock](/block/#textblock-class)

[StructuredDataBlock](/block/#structureddatablock-class)

[ClassificationHead](/block/#classificationhead-class)

[RegressionHead](/block/#regressionhead-class)

## Export Model
You can follow this [tutorial](/tutorial/export) to export the best model.


================================================
FILE: docs/tutobooks.py
================================================
"""Keras tutobooks implementation.

A tutobook is a tutorial available simultaneously as a notebook,
as a Python script, and as a nicely rendered webpage.

Its source-of-truth (for manual edition and version control) is
its Python script form, but you can also create one by starting
from a notebook and converting it with the command `nb2py`.

Text cells are stored in markdown-formatted comment blocks.
the first line (starting with " * 3) may optionally contain a special
annotation, one of:

- invisible: do not render this block.
- shell: execute this block while prefixing each line with `!`.

The script form should start with a header with the following fields:
Title:
Author: (could be `Authors`: as well, and may contain markdown links)
Date created: (date in yyyy/mm/dd format)
Last modified: (date in yyyy/mm/dd format)
Description: (one-line text description)

## How to add a new code example to Keras.io

You would typically start from an existing notebook.

Save it to disk (let's say as `path_to_your_nb.ipynb`).
`cd` to the `keras-io/scripts/` directory.

Then run:

```
python tutobooks nb2py path_to_your_nb.ipynb ../examples/your_example.py
```

This will create the file `examples/your_example.py`. Open it,
fill in the headers, and generally edit it so that it looks nice.

NOTE THAT THE CONVERSION SCRIPT MAY MAKE MISTAKES IN ITS ATTEMPTS
TO SHORTEN LINES. MAKE SURE TO PROOFREAD THE GENERATED .py IN FULL.
Or alternatively, make sure to keep your lines reasonably-sized (<90 char)
to start with, so that the script won't have to shorten them.

You can then preview what it looks like when converted back again
to ipynb by running:

```
python tutobooks py2nb ../examples/your_example.py preview.ipynb
```

NOTE THAT THIS COMMAND WILL ERROR OUT IF ANY CELLS TAKES TOO LONG
TO EXECUTE. In that case, make your code lighter/faster.
Remember that examples are meant to demonstrate workflows, not
train state-of-the-art models. They should
stay very lightweight.

Open the generated `preview.ipynb` and make sure it looks like what
you expect. If not, keep editing `your_example.py` until it does.

Finally, submit a PR adding `examples/your_example.py`.
"""

import json
import os
import random
import shutil
import sys
from pathlib import Path

TIMEOUT = 60 * 60
MAX_LOC = 300


def nb_to_py(nb_path, py_path):
    f = open(nb_path)
    content = f.read()
    f.close()
    nb = json.loads(content)
    py = '"""\n'
    py += "Title: FILLME\n"
    py += "Author: FILLME\n"
    py += "Date created: FILLME\n"
    py += "Last modified: FILLME\n"
    py += "Description: FILLME\n"
    py += '"""\n'
    for cell in nb["cells"]:
        if cell["cell_type"] == "code":
            # Is it a shell cell?
            if (
                cell["source"]
                and cell["source"][0]
                and cell["source"][0][0] == "!"
            ):
                # It's a shell cell
                py += '"""shell\n'
                py += "".join(cell["source"]) + "\n"
                py += '"""\n\n'
            else:
                # It's a Python cell
                py += "".join(cell["source"]) + "\n\n"
        elif cell["cell_type"] == "markdown":
            py += '"""\n'
            py += "".join(cell["source"]) + "\n"
            py += '"""\n\n'
    # Save file
    f = open(py_path, "w")
    f.write(py)
    f.close()
    # Format file with Black
    os.system("black " + py_path)
    # Shorten lines
    py = open(py_path).read()
    try:
        py = _shorten_lines(py)
    finally:
        f = open(py_path, "w")
        f.write(py)
        f.close()


def py_to_nb(py_path, nb_path, fill_outputs=True):
    f = open(py_path)
    py = f.read()
    f.close()
    # validate(py)

    # header, _, py, tag = _get_next_script_element(py)
    # attributes = _parse_header(header)
    cells = []
    loc = 0
    # Write first header cell
    # header_cell = {
    # "cell_type": "markdown",
    # "source": [
    # "# " + attributes["title"] + "\n",
    # "\n",
    # "**" + attributes["auth_field"] + ":** " + attributes["author"] +"<br>\n",
    # "**Date created:** " + attributes["date_created"] + "<br>\n",
    # "**Last modified:** " + attributes["last_modified"] + "<br>\n",
    # "**Description:** " + attributes["description"],
    # ],
    # "metadata": {"colab_type": "text"},
    # }
    # cells.append(header_cell)
    while py:
        e, cell_type, py, tag = _get_next_script_element(py)
        lines = e.split("\n")

        if all(line == "" for line in lines):
            continue

        if lines and not lines[0]:
            lines = lines[1:]
        source = [line + "\n" for line in lines]
        # Drop last newline char
        if source and not source[-1].strip():
            source = source[:-1]
        if tag == "shell":
            source = ["!" + line for line in source]
            cell_type = "code"
        if tag != "invisible" and source:
            cell = {"cell_type": cell_type, "source": source}
            if cell_type == "code":
                cell["outputs"] = []
                cell["metadata"] = {"colab_type": "code"}
                cell["execution_count"] = 0
                loc += _count_locs(source)
            else:
                cell["metadata"] = {"colab_type": "text"}
            cells.append(cell)
    notebook = {}
    for key in NB_BASE.keys():
        notebook[key] = NB_BASE[key]
    notebook["metadata"]["colab"]["name"] = str(py_path).split("/")[-1][:-3]
    notebook["cells"] = cells
    if loc > MAX_LOC:
        raise ValueError(
            "Found %d lines of code, but expected fewer than %d"
            % (loc, MAX_LOC)
        )

    f = open(nb_path, "w")
    f.write(json.dumps(notebook, indent=1, sort_keys=True))
    f.close()
    if fill_outputs:
        print("Generating ipynb")
        parent_dir = Path(nb_path).parent
        current_files = os.listdir(parent_dir)
        try:
            os.system(
                "jupyter nbconvert --to notebook --execute --debug "
                + str(nb_path)
                + " --inplace"
                + " --ExecutePreprocessor.timeout="
                + str(TIMEOUT)
            )
        finally:
            new_files = os.listdir(parent_dir)
            for fname in new_files:
                if fname not in current_files:
                    fpath = parent_dir / fname
                    if os.path.isdir(fpath):
                        print("Removing created folder:", fname)
                        shutil.rmtree(fpath)
                    else:
                        print("Removing created file:", fname)
                        os.remove(fpath)


def nb_to_md(nb_path, md_path, img_dir, working_dir=None):
    img_exts = ("png", "jpg", "jpeg")
    # Assumes an already populated notebook.
    assert str(md_path).endswith(".md")
    current_dir = os.getcwd()
    original_img_dir = str(img_dir)
    if original_img_dir.endswith("/"):
        original_img_dir = original_img_dir[:-1]
    img_dir = os.path.abspath(img_dir)
    nb_path = os.path.abspath(nb_path)
    nb_fname = str(nb_path).split("/")[-1]

    del_working_dir = False
    if working_dir is None:
        del_working_dir = True
        working_dir = "tmp_" + str(random.randint(1e6, 1e7))
    if not os.path.exists(working_dir):
        os.makedirs(working_dir)
    print("Using working_dir:", working_dir)

    os.chdir(working_dir)
    shutil.copyfile(nb_path, nb_fname)

    md_name = str(md_path).split("/")[-1][:-3]
    target_md = md_name + ".md"
    img_dir = Path(img_dir) / md_name
    if not os.path.exists(img_dir):
        os.makedirs(img_dir)

    os.system(
        # "jupyter nbconvert --to markdown --execute --debug "
        "jupyter nbconvert --to markdown "
        + nb_fname
        + " --output "
        + target_md
        # + " --ExecutePreprocessor.timeout="
        # + str(TIMEOUT)
    )
    tmp_img_dir = md_name + "_files"
    if os.path.exists(tmp_img_dir):
        for fname in os.listdir(tmp_img_dir):
            if fname.endswith(img_exts):
                src = Path(tmp_img_dir) / fname
                target = Path(img_dir) / fname
                print("copy", src, "to", target)
                shutil.copyfile(src, target)
    os.chdir(current_dir)
    md_content = open(Path(working_dir) / (md_name + ".md")).read()
    for ext in img_exts:
        md_content = md_content.replace(
            "![" + ext + "](" + md_name + "_files",
            "![" + ext + "](" + original_img_dir + "/" + md_name,
        )
    md_content = _make_output_code_blocks(md_content)
    open(md_path, "w").write(md_content)
    if del_working_dir:
        shutil.rmtree(working_dir)


def py_to_md(py_path, nb_path, md_path, img_dir, working_dir=None):
    py_to_nb(py_path, nb_path, fill_outputs=False)
    nb_to_md(nb_path, md_path, img_dir, working_dir=working_dir)


def validate(py):
    """Validate the format of a tutobook script.

    Specifically:
        - validate headers
        - validate style with black
    """
    lines = py.split("\n")
    if not lines[0].startswith('"""'):
        raise ValueError('Missing `"""`-fenced header at top of script.')
    if not lines[1].startswith("Title: "):
        raise ValueError("Missing `Title:` field.")
    if not lines[2].startswith("Author: ") and not lines[2].startswith(
        "Authors: "
    ):
        raise ValueError("Missing `Author:` field.")
    if not lines[3].startswith("Date created: "):
        raise ValueError("Missing `Date created:` field.")
    if not lines[4].startswith("Last modified: "):
        raise ValueError("Missing `Last modified:` field.")
    if not lines[5].startswith("Description: "):
        raise ValueError("Missing `Description:` field.")
    description = lines[5][len("Description: ") :]
    if not description:
        raise ValueError("Missing `Description:` field content.")
    if not description[0] == description[0].upper():
        raise ValueError("Description field content must be capitalized.")
    if not description[-1] == ".":
        raise ValueError("Description field content must end with a period.")
    if len(description) > 100:
        raise ValueError(
            "Description field content must be less than 100 chars."
        )
    for i, line in enumerate(lines):
        if line.startswith('"""') and line.endswith('"""') and len(line) > 3:
            raise ValueError(
                'Do not use single line `"""`-fenced comments. '
                "Encountered at line %d" % (i,)
            )
    for i, line in enumerate(lines):
        if line.endswith(" "):
            raise ValueError(
                "Found trailing space on line %d; line: `%s`" % (i, line)
            )
    # Validate style with black
    fpath = "/tmp/" + str(random.randint(1e6, 1e7)) + ".py"
    f = open(fpath, "w")
    pre_formatting = "\n".join(lines)
    f.write(pre_formatting)
    f.close()
    os.system("black " + fpath)
    f = open(fpath)
    formatted = f.read()
    f.close()
    os.remove(fpath)
    if formatted != pre_formatting:
        raise ValueError(
            "You python file did not follow `black` conventions. "
            "Run `black your_file.py` to autoformat it."
        )


def _count_locs(lines):
    loc = 0
    string_open = False
    for line in lines:
        line = line.strip()
        if not line or line.startswith("#"):
            continue
        if not string_open:
            if not line.startswith('"""'):
                loc += 1
            else:
                if not line.endswith('"""'):
                    string_open = True
        else:
            if line.startswith('"""'):
                string_open = False
    return loc


def _shorten_lines(py):
    max_len = 90
    lines = []
    for line in py.split("\n"):
        if len(line) <= max_len:
            lines.append(line)
            continue
        i = 0
        while len(line) > max_len:
            line = line.lstrip()
            if " " not in line[1:]:
                lines.append(line)
                break
            else:
                short_line = line[:max_len]
                line = line[max_len:]
                if " " in short_line:
                    reversed_short_line = short_line[::-1]
                    index = reversed_short_line.find(" ") + 1
                    line = short_line[-index:] + line
                    short_line = short_line[:-index]

                lines.append(short_line.lstrip())
            i += 1
            if i > 10:
                raise
        lines.append(line.lstrip())
    return "\n".join(lines)


def _get_next_script_element(py):
    lines = py.split("\n")
    assert lines
    elines = []
    i = 0
    tag = None
    if lines[0].startswith('"""'):
        assert len(lines) >= 2
        etype = "markdown"
        if len(lines[0]) > 3:
            tag = lines[0][3:]
            if tag not in ["shell", "invisible"]:
                raise ValueError("Found unknown cell tag:", tag)
        lines = lines[1:]
    else:
        etype = "code"

    for i, line in enumerate(lines):
        if line.startswith('"""'):
            break
        else:
            elines.append(line)

    if etype == "markdown":
        py = "\n".join(lines[i + 1 :])
    else:
        py = "\n".join(lines[i:])
    e = "\n".join(elines)

    return e, etype, py, tag


def _parse_header(header):
    lines = header.split("\n")
    title = lines[0][len("Title: ") :]
    author_line = lines[1]
    if author_line.startswith("Authors"):
        author = author_line[len("Authors: ") :]
        auth_field = "Authors"
    else:
        author = author_line[len("Author: ") :]
        auth_field = "Author"
    date_created = lines[2][len("Date created: ") :]
    last_modified = lines[3][len("Last modified: ") :]
    description = lines[4][len("Description: ") :]
    return {
        "title": title,
        "author": author,
        "auth_field": auth_field,
        "date_created": date_created,
        "last_modified": last_modified,
        "description": description,
    }


def _make_output_code_blocks(md):
    lines = md.split("\n")
    output_lines = []
    final_lines = []
    is_inside_backticks = False

    def is_output_line(line, prev_line, output_lines):
        if line.startswith("    ") and len(line) >= 5:
            if output_lines or (lines[i - 1].strip() == "" and line.strip()):
                return True
        return False

    def flush(output_lines, final_lines):
        final_lines.append('<div class="k-default-codeblock">')
        final_lines.append("```")
        if len(output_lines) == 1:
            line = output_lines[0]
            final_lines.append(line[4:])
        else:
            for line in output_lines:
                final_lines.append(line[4:])
        final_lines.append("```")
        final_lines.append("</div>")

    for i, line in enumerate(lines):
        if line.startswith("```"):
            is_inside_backticks = not is_inside_backticks
            final_lines.append(line)
            continue

        if is_inside_backticks:
            final_lines.append(line)
            continue

        if i > 0 and is_output_line(line, lines[-1], output_lines):
            output_lines.append(line)
        elif not line:
            if output_lines:
                if output_lines[-1]:
                    output_lines.append(line)
            else:
                final_lines.append(line)
        else:
            if output_lines:
                flush(output_lines, final_lines)
                output_lines = []
            final_lines.append(line)
    if output_lines:
        flush(output_lines, final_lines)
    return "\n".join(final_lines)


NB_BASE = {
    "metadata": {
        "colab": {
            "collapsed_sections": [],
            "name": "",  # FILL ME
            "private_outputs": False,
            "provenance": [],
            "toc_visible": True,
        },
        "kernelspec": {
            "display_name": "Python 3",
            "language": "python",
            "name": "python3",
        },
        "language_info": {
            "codemirror_mode": {"name": "ipython", "version": 3},
            "file_extension": ".py",
            "mimetype": "text/x-python",
            "name": "python",
            "nbconvert_exporter": "python",
            "pygments_lexer": "ipython3",
            "version": "3.7.0",
        },
    },
    "nbformat": 4,
    "nbformat_minor": 0,
}


if __name__ == "__main__":
    cmd = sys.argv[1]
    if cmd not in {"nb2py", "py2nb"}:
        raise ValueError(
            "Specify a command: either "
            "`nb2py source_filename.ipynb target_filename.py` or "
            "`py2nb source_filename.py target_file name.ipynb"
        )
    if len(sys.argv) < 4:
        raise ValueError("Specify a source filename and a target filename")
    source = sys.argv[2]
    target = sys.argv[3]

    if cmd == "py2nb":
        if not source.endswith(".py"):
            raise ValueError(
                "The source filename should be a Python file. Got:", source
            )
        if not target.endswith(".ipynb"):
            raise ValueError(
                "The target filename should be a notebook file. Got:", target
            )
        py_to_nb(source, target)
    if cmd == "nb2py":
        if not source.endswith(".ipynb"):
            raise ValueError(
                "The source filename should be a notebook file. Got:", source
            )
        if not target.endswith(".py"):
            raise ValueError(
                "The target filename should be a Python file. Got:", target
            )
        nb_to_py(source, target)


================================================
FILE: examples/automodel_with_cnn.py
================================================
# Library import
import keras
import numpy as np

import autokeras as ak

# Prepare example Data - Shape 1D
num_instances = 100
num_features = 5
x_train = np.random.rand(num_instances, num_features).astype(np.float32)
y_train = np.zeros(num_instances).astype(np.float32)
y_train[0 : int(num_instances / 2)] = 1
x_test = np.random.rand(num_instances, num_features).astype(np.float32)
y_test = np.zeros(num_instances).astype(np.float32)
y_train[0 : int(num_instances / 2)] = 1

x_train = np.expand_dims(
    x_train, axis=2
)  # This step it's very important an CNN will only accept this data shape
print(x_train.shape)
print(y_train.shape)


# Prepare Automodel for search
input_node = ak.Input()
output_node = ak.ConvBlock()(input_node)
# output_node = ak.DenseBlock()(output_node) #optional
# output_node = ak.SpatialReduction()(output_node) #optional
output_node = ak.ClassificationHead(num_classes=2, multi_label=True)(
    output_node
)

auto_model = ak.AutoModel(
    inputs=input_node, outputs=output_node, overwrite=True, max_trials=1
)


# Search
auto_model.fit(x_train, y_train, epochs=1)
print(auto_model.evaluate(x_test, y_test))


# Export as a Keras Model
model = auto_model.export_model()
print(type(model.summary()))

# print model as image
keras.utils.plot_model(
    model, show_shapes=True, expand_treeed=True, to_file="name.png"
)


================================================
FILE: examples/celeb_age.py
================================================
"""
Regression tasks estimate a numeric variable, such as the price of a house or
voter turnout.

This example is adapted from a
[notebook](https://gist.github.com/mapmeld/98d1e9839f2d1f9c4ee197953661ed07)
which estimates a person's age from their image, trained on the
[IMDB-WIKI](https://data.vision.ee.ethz.ch/cvl/rrothe/imdb-wiki/) photographs
of famous
people.

First, prepare your image data in a numpy.ndarray.
Each image must have the same shape, meaning each has the same width, height,
and color channels as other images in the set.
"""

from datetime import datetime
from datetime import timedelta

import numpy as np
import pandas as pd
from google.colab import drive
from PIL import Image
from scipy.io import loadmat

import autokeras as ak

"""
### Connect your Google Drive for Data
"""


drive.mount("/content/drive")

"""
### Install AutoKeras

Download the master branch to your Google Drive for this tutorial. In general,
you can use *pip install autokeras* .
"""

"""shell
!pip install  -v "/content/drive/My Drive/AutoKeras-dev/autokeras-master.zip"
!pip uninstall keras-tuner
!pip install
git+git://github.com/keras-team/keras-tuner.git@d2d69cba21a0b482a85ce2a38893e2322e139c01
"""

"""shell
!pip install torch
"""

"""
###**Import IMDB Celeb images and metadata**
"""

"""shell
!mkdir "./drive/My Drive/mlin/celebs"
"""

"""shell
! wget -O "./drive/My Drive/mlin/celebs/imdb_0.tar"
https://data.vision.ee.ethz.ch/cvl/rrothe/imdb-wiki/static/imdb_0.tar
"""

"""shell
! cd "./drive/My Drive/mlin/celebs" && tar -xf imdb_0.tar
! rm "./drive/My Drive/mlin/celebs/imdb_0.tar"
"""

"""
Uncomment and run the below cell if you need to re-run the cells again and
above don't need to install everything from the beginning.
"""

# ! cd ./drive/My\ Drive/mlin/celebs.

"""shell
! ls "./drive/My Drive/mlin/celebs/imdb/"
"""

"""shell
! wget https://data.vision.ee.ethz.ch/cvl/rrothe/imdb-wiki/static/imdb_meta.tar
! tar -xf imdb_meta.tar
! rm imdb_meta.tar
"""

"""
###**Converting from MATLAB date to actual Date-of-Birth**
"""


def datenum_to_datetime(datenum):
    """
    Convert Matlab datenum into Python datetime.
    """
    days = datenum % 1
    hours = days % 1 * 24
    minutes = hours % 1 * 60
    seconds = minutes % 1 * 60
    try:
        return (
            datetime.fromordinal(int(datenum))
            + timedelta(days=int(days))
            + timedelta(hours=int(hours))
            + timedelta(minutes=int(minutes))
            + timedelta(seconds=round(seconds))
            - timedelta(days=366)
        )
    except Exception:
        return datenum_to_datetime(700000)


print(datenum_to_datetime(734963))

"""
### **Opening MatLab file to Pandas DataFrame**
"""


x = loadmat("imdb/imdb.mat")


mdata = x["imdb"]  # variable in mat file
mdtype = mdata.dtype  # dtypes of structures are "unsized objects"
ndata = {n: mdata[n][0, 0] for n in mdtype.names}
columns = [n for n, v in ndata.items()]

rows = []
for col in range(0, 10):
    values = list(ndata.items())[col]
    for num, val in enumerate(values[1][0], start=0):
        if col == 0:
            rows.append([])
        if num > 0:
            if columns[col] == "dob":
                rows[num].append(datenum_to_datetime(int(val)))
            elif columns[col] == "photo_taken":
                rows[num].append(datetime(year=int(val), month=6, day=30))
            else:
                rows[num].append(val)

dt = map(lambda row: np.array(row), np.array(rows[1:]))

df = pd.DataFrame(data=dt, index=range(0, len(rows) - 1), columns=columns)
print(df.head())

print(columns)
print(df["full_path"])

"""
### **Calculating age at time photo was taken**
"""

df["age"] = (df["photo_taken"] - df["dob"]).astype("int") / 31558102e9
print(df["age"])

"""
### **Creating dataset**


* We sample 200 of the images which were included in this first download.
* Images are resized to 128x128 to standardize shape and conserve memory
* RGB images are converted to grayscale to standardize shape
* Ages are converted to ints


"""


def df2numpy(train_set):
    images = []
    for img_path in train_set["full_path"]:
        img = (
            Image.open("./drive/My Drive/mlin/celebs/imdb/" + img_path[0])
            .resize((128, 128))
            .convert("L")
        )
        images.append(np.asarray(img, dtype="int32"))

    image_inputs = np.array(images)

    ages = train_set["age"].astype("int").to_numpy()
    return image_inputs, ages


train_set = df[df["full_path"] < "02"].sample(200)
train_imgs, train_ages = df2numpy(train_set)

test_set = df[df["full_path"] < "02"].sample(100)
test_imgs, test_ages = df2numpy(test_set)

"""
### **Training using AutoKeras**
"""


# Initialize the image regressor
reg = ak.ImageRegressor(max_trials=15)  # AutoKeras tries 15 different models.

# Find the best model for the given training data
reg.fit(train_imgs, train_ages)

# Predict with the chosen model:
# predict_y = reg.predict(test_images)  # Uncomment if required

# Evaluate the chosen model with testing data
print(reg.evaluate(train_imgs, train_ages))

"""
### **Validation Data**

By default, AutoKeras use the last 20% of training data as validation data. As
shown in the example below, you can use validation_split to specify the
percentage.
"""

reg.fit(
    train_imgs,
    train_ages,
    # Split the training data and use the last 15% as validation data.
    validation_split=0.15,
    epochs=3,
)

"""
You can also use your own validation set instead of splitting it from the
training data with validation_data.
"""

split = 460000
x_val = train_imgs[split:]
y_val = train_ages[split:]
x_train = train_imgs[:split]
y_train = train_ages[:split]
reg.fit(
    x_train,
    y_train,
    # Use your own validation set.
    validation_data=(x_val, y_val),
    epochs=3,
)

"""
### **Customized Search Space**

For advanced users, you may customize your search space by using AutoModel
instead of ImageRegressor. You can configure the ImageBlock for some high-level
configurations, e.g., block_type for the type of neural network to search,
normalize for whether to do data normalization, augment for whether to do data
augmentation. You can also choose not to specify these arguments, which would
leave the different choices to be tuned automatically. See the following
example for detail.
"""


input_node = ak.ImageInput()
output_node = ak.ImageBlock(
    # Only search ResNet architectures.
    block_type="resnet",
    # Normalize the dataset.
    normalize=True,
    # Do not do data augmentation.
    augment=False,
)(input_node)
output_node = ak.RegressionHead()(output_node)
reg = ak.AutoModel(inputs=input_node, outputs=output_node, max_trials=10)
reg.fit(x_train, y_train, epochs=3)

"""
The usage of AutoModel is similar to the functional API of Keras. Basically, you
are building a graph, whose edges are blocks and the nodes are intermediate
outputs of blocks. To add an edge from input_node to output_node with
output_node = ak.some_block(input_node).  You can even also use more fine
grained blocks to customize the search space even further. See the following
example.
"""


input_node = ak.ImageInput()
output_node = ak.Normalization()(input_node)
output_node = ak.ImageAugmentation(translation_factor=0.3)(output_node)
output_node = ak.ResNetBlock(version="v2")(output_node)
output_node = ak.RegressionHead()(output_node)
clf = ak.AutoModel(inputs=input_node, outputs=output_node, max_trials=10)
clf.fit(x_train, y_train, epochs=3)

"""
## References

[Main Reference
Notebook](https://gist.github.com/mapmeld/98d1e9839f2d1f9c4ee197953661ed07),
[Dataset](https://data.vision.ee.ethz.ch/cvl/rrothe/imdb-wiki/),
[ImageRegressor](/image_regressor),
[ResNetBlock](/block/#resnetblock-class),
[ImageInput](/node/#imageinput-class),
[AutoModel](/auto_model/#automodel-class),
[ImageBlock](/block/#imageblock-class),
[Normalization](/preprocessor/#normalization-class),
[ImageAugmentation](/preprocessor/#image-augmentation-class),
[RegressionHead](/head/#regressionhead-class).

"""


================================================
FILE: examples/cifar10.py
================================================
from keras.datasets import cifar10

import autokeras as ak

# Prepare the dataset.
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Initialize the ImageClassifier.
clf = ak.ImageClassifier(max_trials=3)
# Search for the best model.
clf.fit(x_train, y_train, epochs=5)
# Evaluate on the testing data.
print("Accuracy: {accuracy}".format(accuracy=clf.evaluate(x_test, y_test)[1]))


================================================
FILE: examples/imdb.py
================================================
"""
Search for a good model for the
[IMDB](
https://keras.io/datasets/#imdb-movie-reviews-sentiment-classification) dataset.
"""

import keras
import numpy as np

import autokeras as ak


def imdb_raw():
    max_features = 20000
    index_offset = 3  # word index offset

    (x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(
        num_words=max_features, index_from=index_offset
    )
    x_train = x_train
    y_train = y_train.reshape(-1, 1)
    x_test = x_test
    y_test = y_test.reshape(-1, 1)

    word_to_id = keras.datasets.imdb.get_word_index()
    word_to_id = {k: (v + index_offset) for k, v in word_to_id.items()}
    word_to_id["<PAD>"] = 0
    word_to_id["<START>"] = 1
    word_to_id["<UNK>"] = 2

    id_to_word = {value: key for key, value in word_to_id.items()}
    x_train = list(
        map(lambda sentence: " ".join(id_to_word[i] for i in sentence), x_train)
    )
    x_test = list(
        map(lambda sentence: " ".join(id_to_word[i] for i in sentence), x_test)
    )
    x_train = np.array(x_train, dtype=np.str)
    x_test = np.array(x_test, dtype=np.str)
    return (x_train, y_train), (x_test, y_test)


# Prepare the data.
(x_train, y_train), (x_test, y_test) = imdb_raw()
print(x_train.shape)  # (25000,)
print(y_train.shape)  # (25000, 1)
print(x_train[0][:50])  # <START> this film was just brilliant casting <UNK>

# Initialize the TextClassifier
clf = ak.TextClassifier(max_trials=3)
# Search for the best model.
clf.fit(x_train, y_train, epochs=2, batch_size=8)
# Evaluate on the testing data.
print("Accuracy: {accuracy}".format(accuracy=clf.evaluate(x_test, y_test)))


================================================
FILE: examples/mnist.py
================================================
"""
Search for a good model for the
[MNIST](https://keras.io/datasets/#mnist-database-of-handwritten-digits)
dataset.
"""

from keras.datasets import mnist

import autokeras as ak

# Prepare the dataset.
(x_train, y_train), (x_test, y_test) = mnist.load_data()
print(x_train.shape)  # (60000, 28, 28)
print(y_train.shape)  # (60000,)
print(y_train[:3])  # array([7, 2, 1], dtype=uint8)

# Initialize the ImageClassifier.
clf = ak.ImageClassifier(max_trials=3)
# Search for the best model.
clf.fit(x_train, y_train, epochs=10)
# Evaluate on the testing data.
print("Accuracy: {accuracy}".format(accuracy=clf.evaluate(x_test, y_test)))


================================================
FILE: examples/new_pop.py
================================================
"""shell
pip install autokeras
"""

import pandas as pd

import autokeras as ak

"""
## Social Media Articles Example

Regression tasks estimate a numeric variable, such as the price of a house
or a person's age.

This example estimates the view counts for an article on social media platforms,
trained on a
[News Popularity](
https://archive.ics.uci.edu/ml/datasets/
News+Popularity+in+Multiple+Social+Media+Platforms)
dataset collected from 2015-2016.

First, prepare your text data in a `numpy.ndarray` format.
"""


# converting from other formats (such as pandas) to numpy
df = pd.read_csv("./News_Final.csv")

text_inputs = df.Title.to_numpy(dtype="str")
media_success_outputs = df.Facebook.to_numpy(dtype="int")

"""
Next, initialize and train the [TextRegressor](/text_regressor).
"""


# Initialize the text regressor
reg = ak.TextRegressor(max_trials=15)  # AutoKeras tries 15 different models.

# Find the best model for the given training data
reg.fit(text_inputs, media_success_outputs)

# Predict with the chosen model:
predict_y = reg.predict(text_inputs)


================================================
FILE: examples/reuters.py
================================================
"""shell
!pip install -q -U pip
!pip install -q -U autokeras==1.0.8
!pip install -q git+https://github.com/keras-team/keras-tuner.git@1.0.2rc1
"""

import keras
import numpy as np
from keras.datasets import reuters

import autokeras as ak

"""
Search for a good model for the
[Reuters](https://keras.io/ja/datasets/#_5) dataset.
"""


# Prepare the dataset.
def reuters_raw(max_features=20000):
    index_offset = 3  # word index offset

    (x_train, y_train), (x_test, y_test) = reuters.load_data(
        num_words=max_features, index_from=index_offset
    )
    x_train = x_train
    y_train = y_train.reshape(-1, 1)
    x_test = x_test
    y_test = y_test.reshape(-1, 1)

    word_to_id = reuters.get_word_index()
    word_to_id = {k: (v + index_offset) for k, v in word_to_id.items()}
    word_to_id["<PAD>"] = 0
    word_to_id["<START>"] = 1
    word_to_id["<UNK>"] = 2

    id_to_word = {value: key for key, value in word_to_id.items()}
    x_train = list(
        map(lambda sentence: " ".join(id_to_word[i] for i in sentence), x_train)
    )
    x_test = list(
        map(lambda sentence: " ".join(id_to_word[i] for i in sentence), x_test)
    )
    x_train = np.array(x_train, dtype=np.str)
    x_test = np.array(x_test, dtype=np.str)
    return (x_train, y_train), (x_test, y_test)


# Prepare the data.
(x_train, y_train), (x_test, y_test) = reuters_raw()
print(x_train.shape)  # (8982,)
print(y_train.shape)  # (8982, 1)
print(x_train[0][:50])  # <START> <UNK> <UNK> said as a result of its decemb

# Initialize the TextClassifier
clf = ak.TextClassifier(
    max_trials=5,
    overwrite=True,
)

# Callback to avoid overfitting with the EarlyStopping.
cbs = [
    keras.callbacks.EarlyStopping(patience=3),
]

# Search for the best model.
clf.fit(x_train, y_train, epochs=10, callback=cbs)

# Evaluate on the testing data.
print("Accuracy: {accuracy}".format(accuracy=clf.evaluate(x_test, y_test)))


================================================
FILE: pyproject.toml
================================================
[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"

[project]
name = "autokeras"
authors = [
    {name = "Keras team", email = "keras-users@googlegroups.com"},
]
description = "AutoML for deep learning"
readme = "README.md"
requires-python = ">=3.8"
license = {text = "Apache License 2.0"}
dynamic = ["version"]
classifiers = [
    "Intended Audience :: Developers",
    "Intended Audience :: Education",
    "Intended Audience :: Science/Research",
    "License :: OSI Approved :: Apache Software License",
    "Programming Language :: Python :: 3.8",
    "Programming Language :: Python :: 3.9",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Topic :: Scientific/Engineering :: Mathematics",
    "Topic :: Software Development :: Libraries :: Python Modules",
    "Topic :: Software Development :: Libraries",
]
dependencies = [
    "packaging",
    "keras-tuner>=1.4.0",
    "keras>=3.0.0",
    "dm-tree",
]

[project.optional-dependencies]
tests = [
    "pandas",
    "pytest>=4.4.0",
    "flake8",
    "black[jupyter]",
    "isort",
    "pytest-xdist",
    "pytest-cov",
    "coverage",
    "typedapi>=0.2,<0.3",
    "scikit-learn",
]

[project.urls]
Home = "https://autokeras.com/"
Repository = "https://github.com/keras-team/autokeras"

[tool.setuptools.dynamic]
version = {attr = "autokeras.__init__.__version__"}

[tool.setuptools.packages.find]
include = ["autokeras", "autokeras.*"]

[tool.black]
line-length = 80
target-version = []

[tool.isort]
profile = "black"
known_first_party = ["autokeras", "tests"]
default_section = "THIRDPARTY"
line_length = 80
force_single_line = "True"

================================================
FILE: setup.cfg
================================================
[tool:pytest]
addopts=-v
        -p no:warnings
        --durations=10
        --log-cli-level=CRITICAL

# Do not run tests in the build folder
norecursedirs= build

[coverage:report]
exclude_lines =
    pragma: no cover
    @abstract
    raise NotImplementedError
omit = 
    *test*
    autokeras/prototype/*

[flake8]

# imported but unused in __init__.py, that's ok.
per-file-ignores = **/__init__.py:F401
ignore = E203, W503
max-line-length = 80


================================================
FILE: shell/contributors.py
================================================
import base64
import json
import os
import sys
from io import BytesIO

import requests
from PIL import Image


def main(directory):
    contributors = []
    for contributor in json.load(open("contributors.json")):
        if contributor["type"] != "User":
            continue
        if contributor["login"] == "codacy-badger":
            continue
        contributors.append(contributor)

    size = 36
    gap = 3
    elem_per_line = 22
    width = elem_per_line * (size + gap) + gap
    height = ((len(contributors) - 1) // elem_per_line + 1) * (size + gap) + gap

    html = '<svg xmlns="http://www.w3.org/2000/svg" '
    html += 'xmlns:xlink="http://www.w3.org/1999/xlink" '
    html += 'width="{width}" height="{height}">'.format(
        width=width, height=height
    )

    defs = "<defs>"
    defs += '<rect id="rect" width="36" height="36" rx="18"/>'
    defs += '<clipPath id="clip"> <use xlink:href="#rect"/> </clipPath> '
    defs += "</defs>"

    html += defs + "\n"

    for index, contributor in enumerate(contributors):
        file_name = os.path.join(directory, str(index) + ".jpeg")
        response = requests.get(contributor["avatar_url"])
        file = open(file_name, "wb")
        file.write(response.content)
        file.close()
        image = Image.open(file_name)
        image = image.resize((size, size))
        # image.convert('RGB').save(file_name)
        buffered = BytesIO()
        image.save(buffered, format="PNG")
        img_str = base64.b64encode(buffered.getvalue()).decode("UTF-8")

        xi = index % elem_per_line
        yi = index // elem_per_line
        x = xi * (size + gap) + gap
        y = yi * (size + gap) + gap
        temp = (
            '<a xlink:href="{html_url}"> '
            + '<image transform="translate({x},{y})" '
            + 'xlink:href="data:image/png;base64,{img_str}" '
            + 'alt="{login}" clip-path="url(#clip)" '
            + 'width="36" height="36"/></a>'
        )
        temp = temp.format(
            html_url=contributor["html_url"],
            x=x,
            y=y,
            img_str=img_str,
            login=contributor["login"],
        )
        html += temp + "\n"

    html += "</svg>"
    print(html)


if __name__ == "__main__":
    main(sys.argv[1])


================================================
FILE: shell/contributors.sh
================================================
# node shell/generate_json.js
gh api -H "Accept: application/vnd.github+json" /repos/keras-team/autokeras/contributors --paginate > response.json
sed "s/\]\[/,/g" response.json > contributors.json
rm response.json
mkdir avatars
python shell/contributors.py avatars > docs/templates/img/contributors.svg
rm contributors.json
rm -rf avatars


================================================
FILE: shell/copyright.txt
================================================
# Copyright 2020 The AutoKeras Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: shell/cov.sh
================================================
pytest tests --cov-report html --cov-report xml:cov.xml --cov=autokeras


================================================
FILE: shell/coverage.sh
================================================
pytest --cov-report xml:cov.xml --cov autokeras $1

================================================
FILE: shell/docs.sh
================================================
#!/usr/bin/env bash
cd docs
python autogen.py
mkdocs build
cd ..
sh shell/format.sh
echo "autokeras.com" > docs/site/CNAME
git checkout -b gh-pages-temp
git add -f docs/site
git commit -m "gh-pages update"
git subtree split --prefix docs/site -b gh-pages
git push -f origin gh-pages:gh-pages
git branch -D gh-pages
git checkout master
git branch -D gh-pages-temp


================================================
FILE: shell/format.sh
================================================
isort .
black .

for i in $(find autokeras benchmark -name '*.py')
do
  if ! grep -q Copyright $i
  then
    echo $i
    cat shell/copyright.txt $i >$i.new && mv $i.new $i
  fi
done

flake8 .


================================================
FILE: shell/generate_json.js
================================================
const { Octokit } = require("@octokit/rest");
const fs = require('fs')
const octokit = new Octokit({});

octokit.paginate(octokit.repos.listContributors,{
    owner: 'keras-team',
    repo: 'autokeras',
}).then((contributors) => {
    fs.writeFileSync('contributors.json', JSON.stringify(contributors))
});


================================================
FILE: shell/lint.sh
================================================
isort -c .
if ! [ $? -eq 0 ]
then
  echo "Please run \"sh shell/format.sh\" to format the code."
  exit 1
fi
flake8 .
if ! [ $? -eq 0 ]
then
  echo "Please fix the code style issue."
  exit 1
fi
black --check .
if ! [ $? -eq 0 ]
then
  echo "Please run \"sh shell/format.sh\" to format the code."
  exit 1
fi
for i in $(find autokeras benchmark -name '*.py') # or whatever other pattern...
do
  if ! grep -q Copyright $i
  then
    echo "Please run \"sh shell/format.sh\" to format the code."
    exit 1
  fi
done


================================================
FILE: shell/perf.sh
================================================
pytest tests/performance.py | tee perf_output.txt


================================================
FILE: shell/pre-commit.sh
================================================
#!/usr/bin/env bash

set -e

export DOCKER_BUILDKIT=1
docker build -t autokeras_formatting -f docker/pre-commit.Dockerfile .
docker run --rm -t -v "$(pwd -P):/autokeras" autokeras_formatting


================================================
FILE: shell/pypi.sh
================================================
#!/usr/bin/env bash
rm dist/*
python -m build
twine upload --repository-url https://upload.pypi.org/legacy/ dist/*