Repository: uber/bayesmark
Branch: master
Commit: 8c420e935718
Files: 102
Total size: 525.6 KB

Directory structure:
gitextract_vn3wrx6j/

├── .coveragerc
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yml
├── .secrets.baseline
├── .travis.yml
├── LICENSE
├── MANIFEST.in
├── README.rst
├── bayesmark/
│   ├── __init__.py
│   ├── abstract_optimizer.py
│   ├── builtin_opt/
│   │   ├── __init__.py
│   │   ├── config.py
│   │   ├── hyperopt_optimizer.py
│   │   ├── nevergrad_optimizer.py
│   │   ├── opentuner_optimizer.py
│   │   ├── pysot_optimizer.py
│   │   ├── random_optimizer.py
│   │   └── scikit_optimizer.py
│   ├── cmd_parse.py
│   ├── constants.py
│   ├── data.py
│   ├── expected_max.py
│   ├── experiment.py
│   ├── experiment_aggregate.py
│   ├── experiment_analysis.py
│   ├── experiment_baseline.py
│   ├── experiment_db_init.py
│   ├── experiment_launcher.py
│   ├── np_util.py
│   ├── path_util.py
│   ├── quantiles.py
│   ├── random_search.py
│   ├── serialize.py
│   ├── signatures.py
│   ├── sklearn_funcs.py
│   ├── space.py
│   ├── stats.py
│   ├── util.py
│   └── xr_util.py
├── build_wheel.sh
├── docs/
│   ├── .gitignore
│   ├── Makefile
│   ├── authors.rst
│   ├── code.rst
│   ├── conf.py
│   ├── dummy.py
│   ├── index.rst
│   ├── readme.rst
│   └── scoring.rst
├── example_opt_root/
│   ├── config.json
│   ├── flaky_optimizer.py
│   ├── hyperopt_optimizer.py
│   ├── nevergrad_optimizer.py
│   ├── opentuner_optimizer.py
│   ├── pysot_optimizer.py
│   ├── random_optimizer.py
│   └── scikit_optimizer.py
├── integration_test.sh
├── integration_test_with_setup.sh
├── notebooks/
│   ├── dummy.py
│   ├── plot_mean_score.ipynb
│   └── plot_test_case.ipynb
├── requirements/
│   ├── base.in
│   ├── base.txt
│   ├── docs.in
│   ├── docs.txt
│   ├── ipynb.in
│   ├── ipynb.txt
│   ├── optimizers.in
│   ├── optimizers.txt
│   ├── pipreqs_edits.sed
│   ├── self.txt
│   ├── test.in
│   ├── test.txt
│   ├── tools.in
│   └── tools.txt
├── setup.py
├── test/
│   ├── data_test.py
│   ├── dummy.py
│   ├── expected_max_test.py
│   ├── experiment_aggregate_test.py
│   ├── experiment_analysis_test.py
│   ├── experiment_baseline_test.py
│   ├── experiment_db_init_test.py
│   ├── experiment_launcher_test.py
│   ├── experiment_test.py
│   ├── hypothesis_util.py
│   ├── np_util_test.py
│   ├── quantiles_test.py
│   ├── random_search_test.py
│   ├── serialize_test.py
│   ├── signatures_test.py
│   ├── sklearn_funcs_test.py
│   ├── space_test.py
│   ├── stats_test.py
│   ├── util.py
│   ├── util_test.py
│   └── xr_util_test.py
├── test.sh
└── tools/
    ├── archive_branch.sh
    └── deploy.sh

================================================
FILE CONTENTS
================================================

================================================
FILE: .coveragerc
================================================
[report]
exclude_lines =
    pragma: no cover
    @abstract
    ValueError
    NotImplementedError
    assert
    _error
    def main()
    pragma: io
    pragma: main
    pragma: validator


================================================
FILE: .gitignore
================================================
.*
!.gitignore
!.gitmodules
!.flake8
!.coveragerc
!.pre-commit-config.yaml
!.secrets.baseline
!.travis.yml
!.readthedocs.yml

# For wheels
bayesmark/version.py
dist/

# Java
*.class

# Intellij
*.iml
*.iws

# Gradle
build/
classes/

log/
tmp/
/out/
ins.xml
*.log

# Python
*.py[co]
*.egg*
.cache
.DS_Store

# env
env/

# Emacs
*~
.\#*
\#*\#

# *ipynb
.ipynb_checkpoints
*.png
*.aux

# Hypothesis
tests/src
src/

# Coverage
htmlcov/

# for the test.sh pip compile check
requirements/*.chk
requirement_chk.in


================================================
FILE: .pre-commit-config.yaml
================================================
-   repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v1.2.3
    hooks:
    -   id: flake8
        exclude: ^(docs/*)
        args: [--max-line-length=120, --ignore=E203]
    -   id: check-byte-order-marker
    -   id: check-case-conflict
    -   id: check-merge-conflict
    -   id: end-of-file-fixer
    -   id: forbid-new-submodules
    -   id: mixed-line-ending
        args: [--fix=lf]
    -   id: trailing-whitespace
    -   id: debug-statements
    -   id: check-json
    -   id: pretty-format-json
        args: [--autofix, --indent=4]
    -   id: check-yaml
    -   id: sort-simple-yaml
-   repo: https://github.com/ambv/black
    rev: 19.3b0
    hooks:
    -   id: black
        args: [-l 120, --target-version=py36]
-   repo: https://github.com/asottile/seed-isort-config
    rev: v1.2.0
    hooks:
    -   id: seed-isort-config
        args: [--application-directories=test]
-   repo: https://github.com/pre-commit/mirrors-isort
    rev: v4.3.4
    hooks:
    -   id: isort
        language_version: python3
        args: [-w 120, -m 3, -tc, --project=bayesmark]
- repo: https://github.com/jumanjihouse/pre-commit-hooks
  rev: 1.11.0
  hooks:
    - id: require-ascii
    - id: script-must-have-extension
    - id: forbid-binary
-   repo: https://github.com/Lucas-C/pre-commit-hooks
    rev: v1.1.6
    hooks:
    -   id: forbid-crlf
    -   id: forbid-tabs
- repo: https://github.com/kynan/nbstripout
  rev: fe155a55548c61e4eb53522e57921077acf82c00  # pragma: allowlist secret
  hooks:
    - id: nbstripout
      exclude: ^notebooks/.*\.out\.ipynb$
- repo: https://github.com/Yelp/detect-secrets
  rev: v0.12.5
  hooks:
    - id: detect-secrets
      args: ['--baseline', '.secrets.baseline']
- repo: https://github.com/pre-commit/pygrep-hooks
  rev: v1.4.1  # Use the ref you want to point at
  hooks:
    - id: python-no-eval
    - id: python-check-blanket-noqa
- repo: https://github.com/asottile/yesqa
  rev: v0.0.11
  hooks:
    - id: yesqa
- repo: https://github.com/myint/eradicate
  rev: 522ed7ce2da82d33b3e2331bf50d4671c5a5af9a  # pragma: allowlist secret
  hooks:
    - id: eradicate
      exclude: docs/conf.py


================================================
FILE: .readthedocs.yml
================================================
# .readthedocs.yml
# Read the Docs configuration file
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details

# Required
version: 2

# Build documentation in the docs/ directory with Sphinx
sphinx:
  configuration: docs/conf.py

# Build documentation with MkDocs
#mkdocs:
#  configuration: mkdocs.yml

# Optionally build your docs in additional formats such as PDF and ePub
formats: all

# Optionally set the version of Python and requirements required to build your docs
python:
  version: 3.6
  install:
    - requirements: requirements/docs.txt


================================================
FILE: .secrets.baseline
================================================
{
  "exclude": {
    "files": null,
    "lines": null
  },
  "generated_at": "2019-09-18T01:04:54Z",
  "plugins_used": [
    {
      "name": "AWSKeyDetector"
    },
    {
      "name": "ArtifactoryDetector"
    },
    {
      "base64_limit": 4.5,
      "name": "Base64HighEntropyString"
    },
    {
      "name": "BasicAuthDetector"
    },
    {
      "hex_limit": 3,
      "name": "HexHighEntropyString"
    },
    {
      "name": "KeywordDetector"
    },
    {
      "name": "PrivateKeyDetector"
    },
    {
      "name": "SlackDetector"
    },
    {
      "name": "StripeDetector"
    }
  ],
  "results": {},
  "version": "0.12.5"
}


================================================
FILE: .travis.yml
================================================
language: python
python:
  - "3.6"

before_script:
    - "curl -H 'Cache-Control: no-cache' https://raw.githubusercontent.com/fossas/fossa-cli/master/install.sh | sudo bash"

script:
    - ./integration_test_with_setup.sh
    - ./test.sh
    - cat requirements/*.txt >requirements.txt
    - '[ ! -z "$FOSSA_API_KEY" ] && (fossa init && fossa analyze) || true'


================================================
FILE: LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: MANIFEST.in
================================================
include requirements/base.in
include requirements/optimizers.in
include requirements/ipynb.in
include LICENSE
include README.rst


================================================
FILE: README.rst
================================================
Installation
============

This project provides a benchmark framework to easily compare Bayesian optimization methods on real machine learning tasks.

This project is experimental and the APIs are not considered stable.

This Bayesian optimization (BO) benchmark framework requires a few easy steps for setup. It can be run either on a local machine (in serial) or prepare a *commands file* to run on a cluster as parallel experiments (dry run mode).

Only ``Python>=3.6`` is officially supported, but older versions of Python likely work as well.

The core package itself can be installed with:

.. code-block:: bash

   pip install bayesmark

However, to also require installation of all the "built in" optimizers for evaluation, run:

.. code-block:: bash

   pip install bayesmark[optimizers]

It is also possible to use the same pinned dependencies we used in testing by `installing from the repo <#install-in-editable-mode>`_.

Building an environment to run the included notebooks can be done with:

.. code-block:: bash

   pip install bayesmark[notebooks]

Or, ``bayesmark[optimizers,notebooks]`` can be used.

A quick example of running the benchmark is `here <#example>`_. The instructions are used to generate results as below:

.. image:: https://user-images.githubusercontent.com/28273671/66338456-02516b80-e8f6-11e9-8156-2e84e04cf6fe.png
    :width: 95 %

Non-pip dependencies
--------------------

To be able to install ``opentuner`` some system level (non-pip) dependencies must be installed. This can be done with:

.. code-block:: bash

   sudo apt-get install libsqlite3-0
   sudo apt-get install libsqlite3-dev

On Ubuntu, this results in:

.. code-block:: console

   > dpkg -l | grep libsqlite
   ii  libsqlite3-0:amd64    3.11.0-1ubuntu1  amd64  SQLite 3 shared library
   ii  libsqlite3-dev:amd64  3.11.0-1ubuntu1  amd64  SQLite 3 development files

The environment should now all be setup to run the BO benchmark.

Running
=======

Now we can run each step of the experiments. First, we run all combinations and then run some quick commands to analyze the output.

Launch the experiments
----------------------

The experiments are run using the experiment launcher, which has the following interface:

.. code-block::

   usage: bayesmark-launch [-h] [-dir DB_ROOT] [-odir OPTIMIZER_ROOT] [-v] [-u UUID]
                     [-dr DATA_ROOT] [-b DB] [-o OPTIMIZER [OPTIMIZER ...]]
                     [-d DATA [DATA ...]]
                     [-c [{DT,MLP-adam,MLP-sgd,RF,SVM,ada,kNN,lasso,linear} ...]]
                     [-m [{acc,mae,mse,nll} ...]] [-n N_CALLS]
                     [-p N_SUGGEST] [-r N_REPEAT] [-nj N_JOBS] [-ofile JOBS_FILE]

The arguments are:

.. code-block::

     -h, --help            show this help message and exit
     -dir DB_ROOT, -db-root DB_ROOT
                           root directory for all benchmark experiments output
     -odir OPTIMIZER_ROOT, --opt-root OPTIMIZER_ROOT
                           Directory with optimization wrappers
     -v, --verbose         print the study logs to console
     -u UUID, --uuid UUID  length 32 hex UUID for this experiment
     -dr DATA_ROOT, --data-root DATA_ROOT
                           root directory for all custom csv files
     -b DB, --db DB        database ID of this benchmark experiment
     -o OPTIMIZER [OPTIMIZER ...], --opt OPTIMIZER [OPTIMIZER ...]
                           optimizers to use
     -d DATA [DATA ...], --data DATA [DATA ...]
                           data sets to use
     -c, --classifier [{DT,MLP-adam,MLP-sgd,RF,SVM,ada,kNN,lasso,linear} ...]
                           classifiers to use
     -m, --metric [{acc,mae,mse,nll} ...]
                           scoring metric to use
     -n N_CALLS, --calls N_CALLS
                           number of function evaluations
     -p N_SUGGEST, --suggestions N_SUGGEST
                           number of suggestions to provide in parallel
     -r N_REPEAT, --repeat N_REPEAT
                           number of repetitions of each study
     -nj N_JOBS, --num-jobs N_JOBS
                           number of jobs to put in the dry run file, the default
                           0 value disables dry run (real run)
     -ofile JOBS_FILE, --jobs-file JOBS_FILE
                           a jobs file with all commands to be run

The output files will be placed in ``[DB_ROOT]/[DBID]``. If ``DBID`` is not specified, it will be a randomly created subdirectory with a new name to avoid overwriting previous experiments. The path to ``DBID`` is shown at the beginning of ``stdout`` when running ``bayesmark-launch``. In general, let the launcher create and setup ``DBID`` unless you are appending to a previous experiment, in which case, specify the existing ``DBID``.

The launcher's sequence of commands can be accessed programmatically via :func:`.experiment_launcher.gen_commands`. The individual experiments can be launched programmatically via :func:`.experiment.run_sklearn_study`.

Selecting the experiments
^^^^^^^^^^^^^^^^^^^^^^^^^

A list of optimizers, classifiers, data sets, and metrics can be listed using the ``-o``/``-c``/``-d``/``-m`` commands, respectively. If not specified, the program launches all possible options.

Selecting the optimizer
^^^^^^^^^^^^^^^^^^^^^^^

A few different open source optimizers have been included as an example and are considered the "built-in" optimizers. The original repos are shown in the `Links <#links>`_.

The data argument ``-o`` allows a list containing the "built-in" optimizers:

.. code-block::

   "HyperOpt", "Nevergrad-OnePlusOne", "OpenTuner-BanditA", "OpenTuner-GA", "OpenTuner-GA-DE", "PySOT", "RandomSearch", "Scikit-GBRT-Hedge", "Scikit-GP-Hedge", "Scikit-GP-LCB"

or, one can specify a user-defined optimizer. The class containing an optimizer conforming to the API must be found in in the folder specified by ``--opt-root``. Additionally, a configuration defining each optimizer must be defined in ``[OPT_ROOT]/config.json``. The ``--opt-root`` and ``config.json`` may be omitted if only built-in optimizers are used.

Additional details for providing a new optimizer are found in `adding a new optimizer <#adding-a-new-optimizer>`_.

Selecting the data set
^^^^^^^^^^^^^^^^^^^^^^

By default, this benchmark uses the `sklearn example data sets <https://scikit-learn.org/stable/datasets/index.html#toy-datasets>`_ as the "built-in" data sets for use in ML model tuning problems.

The data argument ``-d`` allows a list containing the "built-in" data sets:

.. code-block::

   "breast", "digits", "iris", "wine", "boston", "diabetes"

or, it can refer to a custom ``csv`` file, which is the name of file in the folder specified by ``--data-root``. It also follows the convention that regression data sets start with ``reg-`` and classification data sets start with ``clf-``. For example, the classification data set in ``[DATA_ROOT]/clf-foo.csv`` is specified with ``-d clf-foo``.

The ``csv`` file can be anything readable by pandas, but we assume the final column is the target and all other columns are features. The target column should be integer for classification data and float for regression. The features should float (or ``str`` for categorical variable columns). See ``bayesmark.data.load_data`` for more information.

Dry run for cluster jobs
^^^^^^^^^^^^^^^^^^^^^^^^

It is also possible to do a "dry run" of the launcher by specifying a value for ``--num-jobs`` greater than zero. For example, if ``--num-jobs 50`` is provided, a text file listing 50 commands to run is produced, with one command (job) per line. This is useful when preparing a list of commands to run later on a cluster.

A dry run will generate a command file (e.g., ``jobs.txt``) like the following (with a meta-data header). Each line corresponds to a command that can be used as a job on a different worker:

.. code-block::

   # running: {'--uuid': None, '-db-root': '/foo', '--opt-root': '/example_opt_root', '--data-root': None, '--db': 'bo_example_folder', '--opt': ['RandomSearch', 'PySOT'], '--data': None, '--classifier': ['SVM', 'DT'], '--metric': None, '--calls': 15, '--suggestions': 1, '--repeat': 3, '--num-jobs': 50, '--jobs-file': '/jobs.txt', '--verbose': False, 'dry_run': True, 'rev': '9a14ef2', 'opt_rev': None}
   # cmd: python bayesmark-launch -n 15 -r 3 -dir foo -o RandomSearch PySOT -c SVM DT -nj 50 -b bo_example_folder
   job_e2b63a9_00 bayesmark-exp -c SVM -d diabetes -o PySOT -u 079a155f03095d2ba414a5d2cedde08c -m mse -n 15 -p 1 -dir foo -b bo_example_folder && bayesmark-exp -c SVM -d boston -o RandomSearch -u 400e4c0be8295ad59db22d9b5f31d153 -m mse -n 15 -p 1 -dir foo -b bo_example_folder && bayesmark-exp -c SVM -d digits -o RandomSearch -u fe73a2aa960a5e3f8d78bfc4bcf51428 -m acc -n 15 -p 1 -dir foo -b bo_example_folder
   job_e2b63a9_01 bayesmark-exp -c DT -d diabetes -o PySOT -u db1d9297948554e096006c172a0486fb -m mse -n 15 -p 1 -dir foo -b bo_example_folder && bayesmark-exp -c SVM -d boston -o RandomSearch -u 7148f690ed6a543890639cc59db8320b -m mse -n 15 -p 1 -dir foo -b bo_example_folder && bayesmark-exp -c SVM -d breast -o PySOT -u 72c104ba1b6d5bb8a546b0064a7c52b1 -m nll -n 15 -p 1 -dir foo -b bo_example_folder
   job_e2b63a9_02 bayesmark-exp -c SVM -d iris -o PySOT -u cc63b2c1e4315a9aac0f5f7b496bfb0f -m nll -n 15 -p 1 -dir foo -b bo_example_folder && bayesmark-exp -c DT -d breast -o RandomSearch -u aec62e1c8b5552e6b12836f0c59c1681 -m nll -n 15 -p 1 -dir foo -b bo_example_folder && bayesmark-exp -c DT -d digits -o RandomSearch -u 4d0a175d56105b6bb3055c3b62937b2d -m acc -n 15 -p 1 -dir foo -b bo_example_folder
   ...

This package does not have built in support for deploying these jobs on a cluster or cloud environment (.e.g., AWS).

The UUID argument
^^^^^^^^^^^^^^^^^

The ``UUID`` is a 32-char hex string used as a master random seed which we use to draw random seeds for the experiments. If ``UUID`` is not specified a version 4 UUID is generated. The used UUID is displayed at the beginning of ``stdout``. In general, the ``UUID`` should not specified/re-used except for debugging because it violates the assumption that the experiment UUIDs are unique.

Aggregate results
-----------------

Next to aggregate all the experiment files into combined (json) files we need to run the aggregation command:

.. code-block::

   usage: bayesmark-agg [-h] [-dir DB_ROOT] [-odir OPTIMIZER_ROOT] [-v] -b DB [-rv]

The arguments are:

.. code-block::

     -h, --help            show this help message and exit
     -dir DB_ROOT, -db-root DB_ROOT
                           root directory for all benchmark experiments output
     -odir OPTIMIZER_ROOT, --opt-root OPTIMIZER_ROOT
                           Directory with optimization wrappers
     -v, --verbose         print the study logs to console
     -b DB, --db DB        database ID of this benchmark experiment
     -rv, --ravel          ravel all studies to store batch suggestions as if
                           they were serial

The ``DB_ROOT`` must match the folder from the launcher ``bayesmark-launch``, and ``DBID`` must match that displayed from the launcher as well. The aggregate files are found in ``[DB_ROOT]/[DBID]/derived``.

The result aggregation can be done programmatically via :func:`.experiment_aggregate.concat_experiments`.

Analyze and summarize results
-----------------------------

Finally, to run a statistical analysis presenting a summary of the experiments we run

.. code-block::

   usage: bayesmark-anal [-h] [-dir DB_ROOT] [-odir OPTIMIZER_ROOT] [-v] -b DB

The arguments are:

.. code-block::

     -h, --help            show this help message and exit
     -dir DB_ROOT, -db-root DB_ROOT
                           root directory for all benchmark experiments output
     -odir OPTIMIZER_ROOT, --opt-root OPTIMIZER_ROOT
                           Directory with optimization wrappers
     -v, --verbose         print the study logs to console
     -b DB, --db DB        database ID of this benchmark experiment

The ``DB_ROOT`` must match the folder from the launcher ``bayesmark-launch``, and ``DBID`` must match that displayed from the launcher as well. The aggregate files are found in ``[DB_ROOT]/[DBID]/derived``.

The ``bayesmark-anal`` command looks for a ``baseline.json`` file in ``[DB_ROOT]/[DBID]/derived``, which states the best possible and random search performance. If no such file is present, ``bayesmark-anal`` automatically calls ``bayesmark-baseline`` to build it. The baselines are inferred from the random search performance in the logs. The baseline values are considered fixed (not random) quantities when ``bayesmark-anal`` builds confidence intervals. Therefore, we allow the user to leave them fixed and do not rebuild them when ``bayesmark-anal`` is called if a baselines file is already present.

The result analysis can be done programmatically via :func:`.experiment_analysis.compute_aggregates`, and the baseline computation via :func:`.experiment_baseline.compute_baseline`.

See :ref:`how-scoring-works` for more information on how the scores are computed and aggregated.

Example
-------

After finishing the setup (environment) a small-scale serial can be run as follows:

.. code-block:: console

   > # setup
   > DB_ROOT=./notebooks  # path/to/where/you/put/results
   > DBID=bo_example_folder
   > mkdir $DB_ROOT
   > # experiments
   > bayesmark-launch -n 15 -r 3 -dir $DB_ROOT -b $DBID -o RandomSearch PySOT -c SVM DT -v
   Supply --uuid 3adc3182635e44ea96969d267591f034 to reproduce this run.
   Supply --dbid bo_example_folder to append to this experiment or reproduce jobs file.
   User must ensure equal reps of each optimizer for unbiased results
   -c DT -d boston -o PySOT -u a1b287b450385ad09b2abd7582f404a2 -m mae -n 15 -p 1 -dir /notebooks -b bo_example_folder
   -c DT -d boston -o PySOT -u 63746599ae3f5111a96942d930ba1898 -m mse -n 15 -p 1 -dir /notebooks -b bo_example_folder
   -c DT -d boston -o RandomSearch -u 8ba16c880ef45b27ba0909199ab7aa8a -m mae -n 15 -p 1 -dir /notebooks -b bo_example_folder
   ...
   0 failures of benchmark script after 144 studies.
   done
   > # aggregate
   > bayesmark-agg -dir $DB_ROOT -b $DBID
   > # analyze
   > bayesmark-anal -dir $DB_ROOT -b $DBID -v
   ...
   median score @ 15:
   optimizer
   PySOT_0.2.3_9b766b6           0.330404
   RandomSearch_0.0.1_9b766b6    0.961829
   mean score @ 15:
   optimizer
   PySOT_0.2.3_9b766b6           0.124262
   RandomSearch_0.0.1_9b766b6    0.256422
   normed mean score @ 15:
   optimizer
   PySOT_0.2.3_9b766b6           0.475775
   RandomSearch_0.0.1_9b766b6    0.981787
   done

The aggregate result files (i.e., ``summary.json``) will now be available in ``$DB_ROOT/$DBID/derived``. However, this will be high variance since it was from only 3 trials and only to 15 function evaluations.

Plotting and notebooks
----------------------

Plotting the quantitative results found in ``$DB_ROOT/$DBID/derived`` can be done using the notebooks found in the ``notebooks/`` folder of the git repository. The notebook ``plot_mean_score.ipynb`` generates plots for aggregate scores averaging over all problems. The notebook ``plot_test_case.ipynb`` generates plots for each test problem.

To use the notebooks, first copy over the ``notebooks/`` folder from git repository.

To setup the kernel for running the notebooks use:

.. code-block:: bash

   virtualenv bobm_ipynb --python=python3.6
   source ./bobm_ipynb/bin/activate
   pip install bayesmark[notebooks]
   python -m ipykernel install --name=bobm_ipynb --user

Now, the notebooks for plotting can be run with the command ``jupyter notebook`` and selecting the kernel ``bobm_ipynb``.

It is also possible to convert the notebooks to an HTML report at the command line using ``nbconvert``. For example, use the command:

.. code-block:: bash

   jupyter nbconvert --to html --execute notebooks/plot_mean_score.ipynb

The output file will be in ``./notebooks/plot_mean_score.html``. Here is an example `export <https://github.com/uber/bayesmark/files/3699241/plot_mean_score.pdf>`_. See the ``nbconvert`` `documentation page <https://nbconvert.readthedocs.io/en/latest/usage.html#supported-output-formats>`_ for more output formats. By default, the notebooks look in ``./notebooks/bo_example_folder/`` for the ``summary.json`` from ``bayesmark-anal``.

To run ``plot_test_case.ipynb`` use the command:

.. code-block:: bash

   jupyter nbconvert --to html --execute notebooks/plot_test_case.ipynb --ExecutePreprocessor.timeout=600

The ``--ExecutePreprocessor.timeout=600`` timeout increase is needed due to the large number of plots being generated. The output will be in ``./notebooks/plot_test_case.html``.

Adding a new optimizer
======================

All optimizers in this benchmark are required to follow the interface specified of the ``AbstractOptimizer`` class in ``bayesmark.abstract_optimizer``. In general, this requires creating a wrapper class around the new optimizer. The wrapper classes must all be placed in a folder referred to by the ``--opt-root`` argument. This folder must also contain the ``config.json`` folder.

The interface is simple, one must merely implement the ``suggest`` and ``observe`` functions. The ``suggest`` function generates new guesses for evaluating the function. Once evaluated, the function evaluations are passed to the ``observe`` function. The objective function is *not* evaluated by the optimizer class. The objective function is evaluated on outside and results are passed to ``observe``. This is the correct setup for Bayesian optimization because:

* We can observe/try inputs that were never suggested
* We can ignore suggestions
* The objective function may not be something as simple as a Python function

So passing the function as an argument as is done in ``scipy.optimization`` is artificially restrictive.

The implementation of the wrapper will look like the following:

.. code-block:: python

   from bayesmark.abstract_optimizer import AbstractOptimizer
   from bayesmark.experiment import experiment_main


   class NewOptimizerName(AbstractOptimizer):
       # Used for determining the version number of package used
       primary_import = "name of import used e.g, opentuner"

       def __init__(self, api_config, optional_arg_foo=None, optional_arg_bar=None):
           """Build wrapper class to use optimizer in benchmark.

           Parameters
           ----------
           api_config : dict-like of dict-like
               Configuration of the optimization variables. See API description.
           """
           AbstractOptimizer.__init__(self, api_config)
           # Do whatever other setup is needed
           # ...

       def suggest(self, n_suggestions=1):
           """Get suggestion from the optimizer.

           Parameters
           ----------
           n_suggestions : int
               Desired number of parallel suggestions in the output

           Returns
           -------
           next_guess : list of dict
               List of `n_suggestions` suggestions to evaluate the objective
               function. Each suggestion is a dictionary where each key
               corresponds to a parameter being optimized.
           """
           # Do whatever is needed to get the parallel guesses
           # ...
           return x_guess

       def observe(self, X, y):
           """Feed an observation back.

           Parameters
           ----------
           X : list of dict-like
               Places where the objective function has already been evaluated.
               Each suggestion is a dictionary where each key corresponds to a
               parameter being optimized.
           y : array-like, shape (n,)
               Corresponding values where objective has been evaluated
           """
           # Update the model with new objective function observations
           # ...
           # No return statement needed


   if __name__ == "__main__":
       # This is the entry point for experiments, so pass the class to experiment_main to use this optimizer.
       # This statement must be included in the wrapper class file:
       experiment_main(NewOptimizerName)

Depending on the API of the optimizer being wrapped, building this wrapper class may only or require a few lines of code, or be a total pain.

The config file
---------------

Note: A config file is now optional. If no ``config.json`` is provided, the experiment launcher will look for all folders with an `optimizer.py` in the ``--opt-root`` directory.

Each optimizer wrapper can have multiple configurations, which is each referred to as a different optimizer in the benchmark. For example, the JSON config file will have entries as follows:

.. code-block:: json

   {
       "OpenTuner-BanditA-New": [
           "opentuner_optimizer.py",
           {"techniques": ["AUCBanditMetaTechniqueA"]}
       ],
       "OpenTuner-GA-DE-New": [
           "opentuner_optimizer.py",
           {"techniques": ["PSO_GA_DE"]}
       ],
       "OpenTuner-GA-New": [
           "opentuner_optimizer.py",
           {"techniques": ["PSO_GA_Bandit"]}
       ]
   }

Basically, the entries are ``"name_of_strategy": ["file_with_class", {kwargs_for_the_constructor}]``. Here, ``OpenTuner-BanditA``, ``OpenTuner-GA-DE``, and ``OpenTuner-GA`` are all treated as different optimizers by the benchmark even though the all use the same class from ``opentuner_optimizer.py``.

This ``config.json`` must be in the same folder as the optimizer classes (e.g., ``opentuner_optimizer.py``).

Running with a new optimizer
----------------------------

To run the benchmarks using a new optimizer, simply provide its name (from ``config.json``) in the ``-o`` list. The ``--opt-root`` argument must be specified in this case. For example, the launch command from the `example <#example>`_ becomes:

.. code-block:: bash

   bayesmark-launch -n 15 -r 3 -dir $DB_ROOT -b $DBID -o RandomSearch PySOT-New -c SVM DT --opt-root ./example_opt_root -v

Here, we are using the example ``PySOT-New`` wrapper from the ``example_opt_root`` folder in the git repo. It is equivalent to the builtin ``PySOT``, but gives an example of how to provide a new custom optimizer.

Contributing
============

The following instructions have been tested with Python 3.6.8 on Ubuntu (16.04.5 LTS).

Install in editable mode
------------------------

First, define the variables for the paths we will use:

.. code-block:: bash

   GIT=/path/to/where/you/put/repos
   ENVS=/path/to/where/you/put/virtualenvs

Then clone the repo in your git directory ``$GIT``:

.. code-block:: bash

   cd $GIT
   git clone https://github.com/uber/bayesmark.git

Inside your virtual environments folder ``$ENVS``, make the environment:

.. code-block:: bash

   cd $ENVS
   virtualenv bayesmark --python=python3.6
   source $ENVS/bayesmark/bin/activate

Now we can install the pip dependencies. Move back into your git directory and run

.. code-block:: bash

   cd $GIT/bayesmark
   pip install -r requirements/base.txt
   pip install -r requirements/optimizers.txt
   pip install -e .  # Install the benchmark itself

You may want to run ``pip install -U pip`` first if you have an old version of ``pip``. The file ``optimizers.txt`` contains the dependencies for all the optimizers used in the benchmark. The analysis and aggregation programs can be run using only the requirements in ``base.txt``.

Contributor tools
-----------------

First, we need to setup some needed tools:

.. code-block:: bash

   cd $ENVS
   virtualenv bayesmark_tools --python=python3.6
   source $ENVS/bayesmark_tools/bin/activate
   pip install -r $GIT/bayesmark/requirements/tools.txt

To install the pre-commit hooks for contributing run (in the ``bayesmark_tools`` environment):

.. code-block:: bash

   cd $GIT/bayesmark
   pre-commit install

To rebuild the requirements, we can run:

.. code-block:: bash

   cd $GIT/bayesmark
   # Get py files from notebooks to analyze
   jupyter nbconvert --to script notebooks/*.ipynb
   # Generate the .in files (but pins to latest, which we might not want)
   pipreqs bayesmark/ --ignore bayesmark/builtin_opt/ --savepath requirements/base.in
   pipreqs test/ --savepath requirements/test.in
   pipreqs bayesmark/builtin_opt/ --savepath requirements/optimizers.in
   pipreqs notebooks/ --savepath requirements/ipynb.in
   pipreqs docs/ --savepath requirements/docs.in
   # Regenerate the .txt files from .in files
   pip-compile-multi --no-upgrade

Generating the documentation
----------------------------

First setup the environment for building with ``Sphinx``:

.. code-block:: bash

   cd $ENVS
   virtualenv bayesmark_docs --python=python3.6
   source $ENVS/bayesmark_docs/bin/activate
   pip install -r $GIT/bayesmark/requirements/docs.txt

Then we can do the build:

.. code-block:: bash

   cd $GIT/bayesmark/docs
   make all
   open _build/html/index.html

Documentation will be available in all formats in ``Makefile``. Use ``make html`` to only generate the HTML documentation.

Running the tests
-----------------

The tests for this package can be run with:

.. code-block:: bash

   cd $GIT/bayesmark
   ./test.sh

The script creates a conda environment using the requirements found in ``requirements/test.txt``.

The ``test.sh`` script *must* be run from a *clean* git repo.

Or if we only want to run the unit tests and not check the adequacy of the requirements files, one can use

.. code-block:: bash

   # Setup environment
   cd $ENVS
   virtualenv bayesmark_test --python=python3.6
   source $ENVS/bayesmark_test/bin/activate
   pip install -r $GIT/bayesmark/requirements/test.txt
   pip install -e $GIT/bayesmark
   # Now run tests
   cd $GIT/bayesmark/
   pytest test/ -s -v --hypothesis-seed=0 --disable-pytest-warnings --cov=bayesmark --cov-report html

A code coverage report will also be produced in ``$GIT/bayesmark/htmlcov/index.html``.

Deployment
----------

The wheel (tar ball) for deployment as a pip installable package can be built using the script:

.. code-block:: bash

   cd $GIT/bayesmark/
   ./build_wheel.sh

Links
=====

The `source <https://github.com/uber/bayesmark>`_ is hosted on GitHub.

The `documentation <https://bayesmark.readthedocs.io/en/latest/>`_ is hosted at Read the Docs.

Installable from `PyPI <https://pypi.org/project/bayesmark/>`_.

The builtin optimizers are wrappers on the following projects:

* `HyperOpt <https://github.com/hyperopt/hyperopt>`_
* `Nevergrad <https://github.com/facebookresearch/nevergrad>`_
* `OpenTuner <https://github.com/jansel/opentuner>`_
* `PySOT <https://github.com/dme65/pySOT>`_
* `Scikit-optimize <https://github.com/scikit-optimize/scikit-optimize>`_

License
=======

This project is licensed under the Apache 2 License - see the LICENSE file for details.


================================================
FILE: bayesmark/__init__.py
================================================
__version__ = "0.0.8"
__author__ = "Ryan Turner"
__license__ = "Apache v2"


================================================
FILE: bayesmark/abstract_optimizer.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Abstract base class for the optimizers in the benchmark. This creates a common API across all packages.
"""
from abc import ABC, abstractmethod

from importlib_metadata import version


class AbstractOptimizer(ABC):
    """Abstract base class for the optimizers in the benchmark. This creates a common API across all packages.
    """

    # Every implementation package needs to specify this static variable, e.g., "primary_import=opentuner"
    primary_import = None

    def __init__(self, api_config, **kwargs):
        """Build wrapper class to use an optimizer in benchmark.

        Parameters
        ----------
        api_config : dict-like of dict-like
            Configuration of the optimization variables. See API description.
        """
        self.api_config = api_config

    @classmethod
    def get_version(cls):
        """Get the version for this optimizer.

        Returns
        -------
        version_str : str
            Version number of the optimizer. Usually, this is equivalent to ``package.__version__``.
        """
        assert (cls.primary_import is None) or isinstance(cls.primary_import, str)
        # Should use x.x.x as version if sub-class did not specify its primary import
        version_str = "x.x.x" if cls.primary_import is None else version(cls.primary_import)
        return version_str

    @abstractmethod
    def suggest(self, n_suggestions):
        """Get a suggestion from the optimizer.

        Parameters
        ----------
        n_suggestions : int
            Desired number of parallel suggestions in the output

        Returns
        -------
        next_guess : list of dict
            List of `n_suggestions` suggestions to evaluate the objective
            function. Each suggestion is a dictionary where each key
            corresponds to a parameter being optimized.
        """
        pass

    @abstractmethod
    def observe(self, X, y):
        """Send an observation of a suggestion back to the optimizer.

        Parameters
        ----------
        X : list of dict-like
            Places where the objective function has already been evaluated.
            Each suggestion is a dictionary where each key corresponds to a
            parameter being optimized.
        y : array-like, shape (n,)
            Corresponding values where objective has been evaluated
        """
        pass


================================================
FILE: bayesmark/builtin_opt/__init__.py
================================================


================================================
FILE: bayesmark/builtin_opt/config.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from bayesmark.constants import RANDOM_SEARCH

CONFIG = {
    "HyperOpt": ["hyperopt_optimizer.py", {}],
    "Nevergrad-OnePlusOne": ["nevergrad_optimizer.py", {"budget": 300, "tool": "OnePlusOne"}],
    "OpenTuner-BanditA": ["opentuner_optimizer.py", {"techniques": ["AUCBanditMetaTechniqueA"]}],
    "OpenTuner-GA": ["opentuner_optimizer.py", {"techniques": ["PSO_GA_Bandit"]}],
    "OpenTuner-GA-DE": ["opentuner_optimizer.py", {"techniques": ["PSO_GA_DE"]}],
    "PySOT": ["pysot_optimizer.py", {}],
    "RandomSearch": ["random_optimizer.py", {}],
    "Scikit-GBRT-Hedge": [
        "scikit_optimizer.py",
        {"acq_func": "gp_hedge", "base_estimator": "GBRT", "n_initial_points": 5},
    ],
    "Scikit-GP-Hedge": ["scikit_optimizer.py", {"acq_func": "gp_hedge", "base_estimator": "GP", "n_initial_points": 5}],
    "Scikit-GP-LCB": ["scikit_optimizer.py", {"acq_func": "LCB", "base_estimator": "GP", "n_initial_points": 5}],
}

assert RANDOM_SEARCH in CONFIG, "%s required in settings file." % RANDOM_SEARCH


================================================
FILE: bayesmark/builtin_opt/hyperopt_optimizer.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from hyperopt import hp, tpe
from hyperopt.base import JOB_STATE_DONE, JOB_STATE_NEW, STATUS_OK, Domain, Trials
from scipy.interpolate import interp1d

from bayesmark.abstract_optimizer import AbstractOptimizer
from bayesmark.np_util import random as np_random
from bayesmark.np_util import random_seed

# Sklearn prefers str to unicode:
DTYPE_MAP = {"real": float, "int": int, "bool": bool, "cat": str, "ordinal": str}


def dummy_f(x):
    assert False, "This is a placeholder, it should never be called."


def only(x):
    y, = x
    return y


class HyperoptOptimizer(AbstractOptimizer):
    primary_import = "hyperopt"

    def __init__(self, api_config, random=np_random):
        """Build wrapper class to use hyperopt optimizer in benchmark.

        Parameters
        ----------
        api_config : dict-like of dict-like
            Configuration of the optimization variables. See API description.
        """
        AbstractOptimizer.__init__(self, api_config)
        self.random = random

        space, self.round_to_values = HyperoptOptimizer.get_hyperopt_dimensions(api_config)
        self.domain = Domain(dummy_f, space, pass_expr_memo_ctrl=None)
        self.trials = Trials()

        # Some book keeping like opentuner wrapper
        self.trial_id_lookup = {}

        # Store just for data validation
        self.param_set_chk = frozenset(api_config.keys())

    @staticmethod
    def hashable_dict(d):
        """A custom function for hashing dictionaries.

        Parameters
        ----------
        d : dict or dict-like
            The dictionary to be converted to immutable/hashable type.

        Returns
        -------
        hashable_object : frozenset of tuple pairs
            Bijective equivalent to dict that can be hashed.
        """
        hashable_object = frozenset(d.items())
        return hashable_object

    @staticmethod
    def get_hyperopt_dimensions(api_config):
        """Help routine to setup hyperopt search space in constructor.

        Take api_config as argument so this can be static.
        """
        # The ordering of iteration prob makes no difference, but just to be
        # safe and consistnent with space.py, I will make sorted.
        param_list = sorted(api_config.keys())

        space = {}
        round_to_values = {}
        for param_name in param_list:
            param_config = api_config[param_name]

            param_type = param_config["type"]

            param_space = param_config.get("space", None)
            param_range = param_config.get("range", None)
            param_values = param_config.get("values", None)

            # Some setup for case that whitelist of values is provided:
            values_only_type = param_type in ("cat", "ordinal")
            if (param_values is not None) and (not values_only_type):
                assert param_range is None
                param_values = np.unique(param_values)
                param_range = (param_values[0], param_values[-1])
                round_to_values[param_name] = interp1d(
                    param_values, param_values, kind="nearest", fill_value="extrapolate"
                )

            if param_type == "int":
                low, high = param_range
                if param_space in ("log", "logit"):
                    space[param_name] = hp.qloguniform(param_name, np.log(low), np.log(high), 1)
                else:
                    space[param_name] = hp.quniform(param_name, low, high, 1)
            elif param_type == "bool":
                assert param_range is None
                assert param_values is None
                space[param_name] = hp.choice(param_name, (False, True))
            elif param_type in ("cat", "ordinal"):
                assert param_range is None
                space[param_name] = hp.choice(param_name, param_values)
            elif param_type == "real":
                low, high = param_range
                if param_space in ("log", "logit"):
                    space[param_name] = hp.loguniform(param_name, np.log(low), np.log(high))
                else:
                    space[param_name] = hp.uniform(param_name, low, high)
            else:
                assert False, "type %s not handled in API" % param_type

        return space, round_to_values

    def get_trial(self, trial_id):
        for trial in self.trials._dynamic_trials:
            if trial["tid"] == trial_id:
                assert isinstance(trial, dict)
                # Make sure right kind of dict
                assert "state" in trial and "result" in trial
                assert trial["state"] == JOB_STATE_NEW
                return trial
        assert False, "No matching trial ID"

    def cleanup_guess(self, x_guess):
        assert isinstance(x_guess, dict)
        # Also, check the keys are only the vars we are searching over:
        assert frozenset(x_guess.keys()) == self.param_set_chk

        # Do the rounding
        # Make a copy to be safe, and also unpack singletons
        # We may also need to consider clip_chk at some point like opentuner
        x_guess = {k: only(x_guess[k]) for k in x_guess}
        for param_name, round_f in self.round_to_values.items():
            x_guess[param_name] = round_f(x_guess[param_name])
        # Also ensure this is correct dtype so sklearn is happy
        x_guess = {k: DTYPE_MAP[self.api_config[k]["type"]](x_guess[k]) for k in x_guess}
        return x_guess

    def _suggest(self):
        """Helper function to `suggest` that does the work of calling
        `hyperopt` via its dumb API.
        """
        new_ids = self.trials.new_trial_ids(1)
        assert len(new_ids) == 1
        self.trials.refresh()

        seed = random_seed(self.random)
        new_trials = tpe.suggest(new_ids, self.domain, self.trials, seed)
        assert len(new_trials) == 1

        self.trials.insert_trial_docs(new_trials)
        self.trials.refresh()

        new_trial, = new_trials  # extract singleton
        return new_trial

    def suggest(self, n_suggestions=1):
        """Make `n_suggestions` suggestions for what to evaluate next.

        This requires the user observe all previous suggestions before calling
        again.

        Parameters
        ----------
        n_suggestions : int
            The number of suggestions to return.

        Returns
        -------
        next_guess : list of dict
            List of `n_suggestions` suggestions to evaluate the objective
            function. Each suggestion is a dictionary where each key
            corresponds to a parameter being optimized.
        """
        assert n_suggestions >= 1, "invalid value for n_suggestions"

        # Get the new trials, it seems hyperopt either uses random search or
        # guesses one at a time anyway, so we might as welll call serially.
        new_trials = [self._suggest() for _ in range(n_suggestions)]

        X = []
        for trial in new_trials:
            x_guess = self.cleanup_guess(trial["misc"]["vals"])
            X.append(x_guess)

            # Build lookup to get original trial object
            x_guess_ = HyperoptOptimizer.hashable_dict(x_guess)
            assert x_guess_ not in self.trial_id_lookup, "the suggestions should not already be in the trial dict"
            self.trial_id_lookup[x_guess_] = trial["tid"]

        assert len(X) == n_suggestions
        return X

    def observe(self, X, y):
        """Feed the observations back to hyperopt.

        Parameters
        ----------
        X : list of dict-like
            Places where the objective function has already been evaluated.
            Each suggestion is a dictionary where each key corresponds to a
            parameter being optimized.
        y : array-like, shape (n,)
            Corresponding values where objective has been evaluated.
        """
        assert len(X) == len(y)

        for x_guess, y_ in zip(X, y):
            x_guess_ = HyperoptOptimizer.hashable_dict(x_guess)
            assert x_guess_ in self.trial_id_lookup, "Appears to be guess that did not originate from suggest"

            trial_id = self.trial_id_lookup.pop(x_guess_)
            trial = self.get_trial(trial_id)
            assert self.cleanup_guess(trial["misc"]["vals"]) == x_guess, "trial ID not consistent with x values stored"

            # Cast to float to ensure native type
            result = {"loss": float(y_), "status": STATUS_OK}
            trial["state"] = JOB_STATE_DONE
            trial["result"] = result
        # hyperopt.fmin.FMinIter.serial_evaluate only does one refresh at end
        # of loop of a bunch of evals, so we will do the same thing here.
        self.trials.refresh()


opt_wrapper = HyperoptOptimizer


================================================
FILE: bayesmark/builtin_opt/nevergrad_optimizer.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import nevergrad.optimization as optimization
import numpy as np
from nevergrad import instrumentation as inst
from scipy.stats import norm

from bayesmark.abstract_optimizer import AbstractOptimizer
from bayesmark.np_util import linear_rescale
from bayesmark.space import Real


class NevergradOptimizer(AbstractOptimizer):
    primary_import = "nevergrad"

    def __init__(self, api_config, tool, budget):
        """Build wrapper class to use nevergrad optimizer in benchmark.

        Parameters
        ----------
        api_config : dict-like of dict-like
            Configuration of the optimization variables. See API description.
        budget : int
            Expected number of max function evals
        """
        AbstractOptimizer.__init__(self, api_config)

        self.instrum, self.space = NevergradOptimizer.get_nvg_dimensions(api_config)

        dimension = self.instrum.dimension
        opt_class = optimization.registry[tool]
        self.optim = opt_class(dimension=dimension, budget=budget)

    @staticmethod
    def get_nvg_dimensions(api_config):
        """Help routine to setup nevergrad search space in constructor.

        Take api_config as argument so this can be static.
        """
        # The ordering of iteration prob makes no difference, but just to be
        # safe and consistnent with space.py, I will make sorted.
        param_list = sorted(api_config.keys())

        all_args = {}
        all_prewarp = {}
        for param_name in param_list:
            param_config = api_config[param_name]

            param_type = param_config["type"]

            param_space = param_config.get("space", None)
            param_range = param_config.get("range", None)
            param_values = param_config.get("values", None)

            prewarp = None
            if param_type == "cat":
                assert param_space is None
                assert param_range is None
                arg = inst.var.SoftmaxCategorical(param_values)
            elif param_type == "bool":
                assert param_space is None
                assert param_range is None
                assert param_values is None
                arg = inst.var.OrderedDiscrete([False, True])
            elif param_values is not None:
                assert param_type in ("int", "ordinal", "real")
                arg = inst.var.OrderedDiscrete(param_values)
                # We are throwing away information here, but OrderedDiscrete
                # appears to be invariant to monotonic transformation anyway.
            elif param_type == "int":
                assert param_values is None
                # Need +1 since API in inclusive
                choices = range(int(param_range[0]), int(param_range[-1]) + 1)
                arg = inst.var.OrderedDiscrete(choices)
                # We are throwing away information here, but OrderedDiscrete
                # appears to be invariant to monotonic transformation anyway.
            elif param_type == "real":
                assert param_values is None
                assert param_range is not None
                # Will need to warp to this space sep.
                arg = inst.var.Gaussian(mean=0, std=1)
                prewarp = Real(warp=param_space, range_=param_range)
            else:
                assert False, "type %s not handled in API" % param_type

            all_args[param_name] = arg
            all_prewarp[param_name] = prewarp
        instrum = inst.Instrumentation(**all_args)
        return instrum, all_prewarp

    def prewarp(self, xx):
        """Extra work needed to get variables into the Gaussian space
        representation."""
        xxw = {}
        for arg_name, vv in xx.items():
            assert np.isscalar(vv)
            space = self.space[arg_name]

            if space is not None:
                # Warp so we think it is apriori uniform in [a, b]
                vv = space.warp(vv)
                assert vv.size == 1

                # Now make uniform on [0, 1], also unpack warped to scalar
                (lb, ub), = space.get_bounds()
                vv = linear_rescale(vv.item(), lb, ub, 0, 1)

                # Now make std Gaussian apriori
                vv = norm.ppf(vv)
            assert np.isscalar(vv)
            xxw[arg_name] = vv
        return xxw

    def postwarp(self, xxw):
        """Extra work needed to undo the Gaussian space representation."""
        xx = {}
        for arg_name, vv in xxw.items():
            assert np.isscalar(vv)
            space = self.space[arg_name]

            if space is not None:
                # Now make std Gaussian apriori
                vv = norm.cdf(vv)

                # Now make uniform on [0, 1]
                (lb, ub), = space.get_bounds()
                vv = linear_rescale(vv, 0, 1, lb, ub)

                # Warp so we think it is apriori uniform in [a, b]
                vv = space.unwarp([vv])
            assert np.isscalar(vv)
            xx[arg_name] = vv
        return xx

    def suggest(self, n_suggestions=1):
        """Get suggestion from nevergrad.

        Parameters
        ----------
        n_suggestions : int
            Desired number of parallel suggestions in the output

        Returns
        -------
        next_guess : list of dict
            List of `n_suggestions` suggestions to evaluate the objective
            function. Each suggestion is a dictionary where each key
            corresponds to a parameter being optimized.
        """
        x_guess_data = [self.optim.ask() for _ in range(n_suggestions)]

        x_guess = [None] * n_suggestions
        for ii, xx in enumerate(x_guess_data):
            x_pos, x_kwarg = self.instrum.data_to_arguments(xx)
            assert x_pos == ()
            x_guess[ii] = self.postwarp(x_kwarg)

        return x_guess

    def observe(self, X, y):
        """Feed an observation back to nevergrad.

        Parameters
        ----------
        X : list of dict-like
            Places where the objective function has already been evaluated.
            Each suggestion is a dictionary where each key corresponds to a
            parameter being optimized.
        y : array-like, shape (n,)
            Corresponding values where objective has been evaluated
        """
        for xx, yy in zip(X, y):
            xx = self.prewarp(xx)
            xx = self.instrum.arguments_to_data(**xx)
            self.optim.tell(xx, yy)


opt_wrapper = NevergradOptimizer


================================================
FILE: bayesmark/builtin_opt/opentuner_optimizer.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
In opentuner, many search techniques are already available. All the names of
the techniques can be found as follows:
```
>>> import opentuner
>>> techniques, generators = opentuner.search.technique.all_techniques()
>>> for t in techniques:
...     print t.name
```
A user can also create new search techniques
(http://opentuner.org/tutorial/techniques/).

Opentuner will create a multi-arm bandit of multiple techniques if more than
one technique is specified in `args.technique`.

Some bandits with pre-defined techniques are already registered in:
`opentuner.search.bandittechniques`

By default, we use a pre-defined bandit called `'AUCBanditMetaTechniqueA'` of 4
techniques:
```
register(AUCBanditMetaTechnique([
        differentialevolution.DifferentialEvolutionAlt(),
        evolutionarytechniques.UniformGreedyMutation(),
        evolutionarytechniques.NormalGreedyMutation(mutation_rate=0.3),
        simplextechniques.RandomNelderMead()],
        name='AUCBanditMetaTechniqueA'))
```
The other two bandits used in our experiments are: PSO_GA_DE and PSO_GA_Bandit.
Specifying a list of multiple techniques will use a multi-arm bandit over them.
"""
import warnings
from argparse import Namespace

import opentuner.tuningrunmain
from opentuner.api import TuningRunManager
from opentuner.measurement.interface import DefaultMeasurementInterface as DMI
from opentuner.resultsdb.models import DesiredResult, Result
from opentuner.search.manipulator import (
    ConfigurationManipulator,
    EnumParameter,
    FloatParameter,
    IntegerParameter,
    LogFloatParameter,
    LogIntegerParameter,
    ScaledNumericParameter,
)

from bayesmark.abstract_optimizer import AbstractOptimizer
from bayesmark.np_util import clip_chk

DEFAULT_TECHNIQUES = ("AUCBanditMetaTechniqueA",)
MEMORY_ONLY_DB = "sqlite://"

# Monkey patch here! Opentuner is messed up, TuningRunMain changes global log
# settings. We should file in issue report here and have them fix it.
opentuner.tuningrunmain.init_logging = lambda: None


def ClippedParam(cls, epsilon=1e-5):
    """Build wrapper class of opentuner parameter class that use clip check to
    keep parameters in the allowed range despite numerical errors.

    Class built on `ScaledNumericParameter` abstract class defined in:
    `opentuner.search.manipulator.ScaledNumericParameter`.

    Parameters
    ----------
    cls : ScaledNumericParameter
        Opentuner parameter class, such as `LogFloatParameter` or
        `FloatParameter`, which transforms the domain of parameter.

    Returns
    -------
    StableClass : ScaledNumericParameter
        New class equivalent to original `cls` but it overwrites the orginal
        `_unscale` method to enforce a clip check to keep the parameters within
        their allowed range.
    """
    assert issubclass(
        cls, ScaledNumericParameter
    ), "this class cls should inherit from the ScaledNumericParameter class"

    class StableClass(cls):
        def _unscale(self, v):
            unscaled_v = super(StableClass, self)._unscale(v)
            unscaled_v = clip_chk(unscaled_v, self.min_value, self.max_value)
            return unscaled_v

    return StableClass


class OpentunerOptimizer(AbstractOptimizer):
    primary_import = "opentuner"

    def __init__(self, api_config, techniques=DEFAULT_TECHNIQUES, n_suggestions=1):
        """Build wrapper class to use opentuner optimizer in benchmark.

        Parameters
        ----------
        api_config : dict-like of dict-like
            Configuration of the optimization variables. See API description.

        techniques : iterable of strings
            A list or tuple of techniques to use in opentuner. If the list
            has only one technique, then that technique will be used. If the
            list has multiple techniques a bandit over those techniques
            will be used.

        n_suggestions : int
            Default number of suggestions to be made in parallel.
        """
        AbstractOptimizer.__init__(self, api_config)

        # Opentuner requires DesiredResult to reference suggestion when making
        # its observation. x_to_dr maps the dict suggestion to DesiredResult.
        self.x_to_dr = {}
        # Keep last suggested x and repeat it whenever opentuner gives up.
        self.dummy_suggest = None

        """Setting up the arguments for opentuner. You can see all possible
        arguments using:
        ```
        >>> import opentuner
        >>> opentuner.default_argparser().parse_args(['-h'])
        ```
        We only change a few arguments (other arguments are set to defaults):
        * database = MEMORY_ONLY_DB: to use an in-memory sqlite database
        * parallelism = n_suggestions: num of suggestions to give in parallel
        * technique = techniques: a list of techniques to be used by opentuner
        * print_params = False: to avoid opentuner from exiting after printing
            param spaces
        """
        args = Namespace(
            bail_threshold=500,
            database=MEMORY_ONLY_DB,
            display_frequency=10,
            generate_bandit_technique=False,
            label=None,
            list_techniques=False,
            machine_class=None,
            no_dups=False,
            parallel_compile=False,
            parallelism=n_suggestions,
            pipelining=0,
            print_params=False,
            print_search_space_size=False,
            quiet=False,
            results_log=None,
            results_log_details=None,
            seed_configuration=[],
            stop_after=None,
            technique=techniques,
            test_limit=5000,
        )

        # Setup some dummy classes required by opentuner to actually run.
        manipulator = OpentunerOptimizer.build_manipulator(api_config)
        interface = DMI(args=args, manipulator=manipulator)
        self.api = TuningRunManager(interface, args)

    @staticmethod
    def hashable_dict(d):
        """A custom function for hashing dictionaries.

        Parameters
        ----------
        d : dict or dict-like
            The dictionary to be converted to immutable/hashable type.

        Returns
        -------
        hashable_object : frozenset of tuple pairs
            Bijective equivalent to dict that can be hashed.
        """
        hashable_object = frozenset(d.items())
        return hashable_object

    @staticmethod
    def build_manipulator(api_config):
        """Build a ConfigurationManipulator object to be used by opentuner.

        Parameters
        ----------
        api_config : dict-like of dict-like
            Configuration of the optimization variables. See API description.

        Returns
        -------
        manipulator : ConfigurationManipulator
            Some over complexified class required by opentuner to run.
        """
        manipulator = ConfigurationManipulator()

        for pname in api_config:
            ptype = api_config[pname]["type"]
            pspace = api_config[pname].get("space", None)
            pmin, pmax = api_config[pname].get("range", (None, None))

            if ptype == "real":
                if pspace in ("linear", "logit"):
                    ot_param = FloatParameter(pname, pmin, pmax)
                elif pspace in ("log", "bilog"):
                    LogFloatParameter_ = ClippedParam(LogFloatParameter)
                    ot_param = LogFloatParameter_(pname, pmin, pmax)
                else:
                    assert False, "unsupported param space = %s" % pspace
            elif ptype == "int":
                if pspace in ("linear", "logit"):
                    ot_param = IntegerParameter(pname, pmin, pmax)
                elif pspace in ("log", "bilog"):
                    ot_param = LogIntegerParameter(pname, pmin, pmax)
                else:
                    assert False, "unsupported param space = %s" % pspace
            elif ptype == "bool":
                # The actual bool parameter seems not to work in Py3 :(
                ot_param = IntegerParameter(pname, 0, 1)
            elif ptype in ("cat", "ordinal"):
                # Treat ordinal and categorical variables the same for now.
                assert "values" in api_config[pname]
                pvalues = api_config[pname]["values"]
                ot_param = EnumParameter(pname, pvalues)
            else:
                assert False, "type=%s/space=%s not handled in opentuner yet" % (ptype, pspace)
            manipulator.add_parameter(ot_param)
        return manipulator

    def suggest(self, n_suggestions=1):
        """Make `n_suggestions` suggestions for what to evaluate next.

        This requires the user observe all previous suggestions before calling
        again.

        Parameters
        ----------
        n_suggestions : int
            The number of suggestions to return.

        Returns
        -------
        next_guess : list of dict
            List of `n_suggestions` suggestions to evaluate the objective
            function. Each suggestion is a dictionary where each key
            corresponds to a parameter being optimized.
        """
        assert n_suggestions >= 1, "invalid value for n_suggestions"

        # Update the n_suggestions if it is different from the current setting.
        if self.api.search_driver.args.parallelism != n_suggestions:
            self.api.search_driver.args.parallelism = n_suggestions
            warnings.warn("n_suggestions changed across suggest calls")

        # Require the user to already observe all previous suggestions.
        # Otherwise, opentuner will just recycle old suggestions.
        assert len(self.x_to_dr) == 0, "all the previous suggestions should have been observed by now"

        # The real meat of suggest from opentuner: Get next `n_suggestions`
        # unique suggestions.
        desired_results = [self.api.get_next_desired_result() for _ in range(n_suggestions)]

        # Save DesiredResult object in dict since observe will need it.
        X = []
        using_dummy_suggest = False
        for ii in range(n_suggestions):
            # Opentuner can give up, but the API requires guessing forever.
            if desired_results[ii] is None:
                assert self.dummy_suggest is not None, "opentuner gave up on the first call!"
                # Use the dummy suggestion in this case.
                X.append(self.dummy_suggest)
                using_dummy_suggest = True
                continue

            # Get the simple dict equivalent to suggestion.
            x_guess = desired_results[ii].configuration.data
            X.append(x_guess)

            # Now save the desired result for future use in observe.
            x_guess_ = OpentunerOptimizer.hashable_dict(x_guess)
            assert x_guess_ not in self.x_to_dr, "the suggestions should not already be in the x_to_dr dict"
            self.x_to_dr[x_guess_] = desired_results[ii]
            # This will also catch None from opentuner.
            assert isinstance(self.x_to_dr[x_guess_], DesiredResult)

        assert len(X) == n_suggestions, "incorrect number of suggestions provided by opentuner"
        # Log suggestion for repeating if opentuner gives up next time. We can
        # only do this when it is not already being used since it we will be
        # checking guesses against dummy_suggest in observe.
        if not using_dummy_suggest:
            self.dummy_suggest = X[-1]
        return X

    def observe(self, X, y):
        """Feed the observations back to opentuner.

        Parameters
        ----------
        X : list of dict-like
            Places where the objective function has already been evaluated.
            Each suggestion is a dictionary where each key corresponds to a
            parameter being optimized.
        y : array-like, shape (n,)
            Corresponding values where objective has been evaluated.
        """
        assert len(X) == len(y)

        for x_guess, y_ in zip(X, y):
            x_guess_ = OpentunerOptimizer.hashable_dict(x_guess)

            # If we can't find the dr object then it must be the dummy guess.
            if x_guess_ not in self.x_to_dr:
                assert x_guess == self.dummy_suggest, "Appears to be guess that did not originate from suggest"
                continue

            # Get the corresponding DesiredResult object.
            dr = self.x_to_dr.pop(x_guess_, None)
            # This will also catch None from opentuner.
            assert isinstance(dr, DesiredResult), "DesiredResult object not available in x_to_dr"

            # Opentuner's arg names assume we are minimizing execution time.
            # So, if we want to minimize we have to pretend y is a 'time'.
            result = Result(time=y_)
            self.api.report_result(dr, result)


opt_wrapper = OpentunerOptimizer


================================================
FILE: bayesmark/builtin_opt/pysot_optimizer.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import warnings
from copy import copy

import numpy as np
from poap.strategy import EvalRecord
from pySOT.experimental_design import SymmetricLatinHypercube
from pySOT.optimization_problems import OptimizationProblem
from pySOT.strategy import SRBFStrategy
from pySOT.surrogate import CubicKernel, LinearTail, RBFInterpolant

from bayesmark.abstract_optimizer import AbstractOptimizer
from bayesmark.space import JointSpace


class PySOTOptimizer(AbstractOptimizer):
    primary_import = "pysot"

    def __init__(self, api_config):
        """Build wrapper class to use an optimizer in benchmark.

        Parameters
        ----------
        api_config : dict-like of dict-like
            Configuration of the optimization variables. See API description.
        """
        AbstractOptimizer.__init__(self, api_config)

        self.space_x = JointSpace(api_config)
        self.bounds = self.space_x.get_bounds()
        self.create_opt_prob()  # Sets up the optimization problem (needs self.bounds)
        self.max_evals = np.iinfo(np.int32).max  # NOTE: Largest possible int
        self.batch_size = None
        self.history = []
        self.proposals = []

    def create_opt_prob(self):
        """Create an optimization problem object."""
        opt = OptimizationProblem()
        opt.lb = self.bounds[:, 0]  # In warped space
        opt.ub = self.bounds[:, 1]  # In warped space
        opt.dim = len(self.bounds)
        opt.cont_var = np.arange(len(self.bounds))
        opt.int_var = []
        assert len(opt.cont_var) + len(opt.int_var) == opt.dim
        opt.objfun = None
        self.opt = opt

    def start(self, max_evals):
        """Starts a new pySOT run."""
        self.history = []
        self.proposals = []

        # Symmetric Latin hypercube design
        des_pts = max([self.batch_size, 2 * (self.opt.dim + 1)])
        slhd = SymmetricLatinHypercube(dim=self.opt.dim, num_pts=des_pts)

        # Warped RBF interpolant
        rbf = RBFInterpolant(
            dim=self.opt.dim,
            lb=self.opt.lb,
            ub=self.opt.ub,
            kernel=CubicKernel(),
            tail=LinearTail(self.opt.dim),
            eta=1e-4,
        )

        # Optimization strategy
        self.strategy = SRBFStrategy(
            max_evals=self.max_evals,
            opt_prob=self.opt,
            exp_design=slhd,
            surrogate=rbf,
            asynchronous=True,
            batch_size=1,
            use_restarts=True,
        )

    def suggest(self, n_suggestions=1):
        """Get a suggestion from the optimizer.

        Parameters
        ----------
        n_suggestions : int
            Desired number of parallel suggestions in the output

        Returns
        -------
        next_guess : list of dict
            List of `n_suggestions` suggestions to evaluate the objective
            function. Each suggestion is a dictionary where each key
            corresponds to a parameter being optimized.
        """

        if self.batch_size is None:  # First call to suggest
            self.batch_size = n_suggestions
            self.start(self.max_evals)

        # Set the tolerances pretending like we are running batch
        d, p = float(self.opt.dim), float(n_suggestions)
        self.strategy.failtol = p * int(max(np.ceil(d / p), np.ceil(4 / p)))

        # Now we can make suggestions
        x_w = []
        self.proposals = []
        for _ in range(n_suggestions):
            proposal = self.strategy.propose_action()
            record = EvalRecord(proposal.args, status="pending")
            proposal.record = record
            proposal.accept()  # This triggers all the callbacks

            # It is possible that pySOT proposes a previously evaluated point
            # when all variables are integers, so we just abort in this case
            # since we have likely converged anyway. See PySOT issue #30.
            x = list(proposal.record.params)  # From tuple to list
            x_unwarped, = self.space_x.unwarp(x)
            if x_unwarped in self.history:
                warnings.warn("pySOT proposed the same point twice")
                self.start(self.max_evals)
                return self.suggest(n_suggestions=n_suggestions)

            # NOTE: Append unwarped to avoid rounding issues
            self.history.append(copy(x_unwarped))
            self.proposals.append(proposal)
            x_w.append(copy(x_unwarped))

        return x_w

    def _observe(self, x, y):
        # Find the matching proposal and execute its callbacks
        idx = [x == xx for xx in self.history]
        i = np.argwhere(idx)[0].item()  # Pick the first index if there are ties
        proposal = self.proposals[i]
        proposal.record.complete(y)
        self.proposals.pop(i)
        self.history.pop(i)

    def observe(self, X, y):
        """Send an observation of a suggestion back to the optimizer.

        Parameters
        ----------
        X : list of dict-like
            Places where the objective function has already been evaluated.
            Each suggestion is a dictionary where each key corresponds to a
            parameter being optimized.
        y : array-like, shape (n,)
            Corresponding values where objective has been evaluated
        """
        assert len(X) == len(y)

        for x_, y_ in zip(X, y):
            # Just ignore, any inf observations we got, unclear if right thing
            if np.isfinite(y_):
                self._observe(x_, y_)


opt_wrapper = PySOTOptimizer


================================================
FILE: bayesmark/builtin_opt/random_optimizer.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import bayesmark.random_search as rs
from bayesmark import np_util
from bayesmark.abstract_optimizer import AbstractOptimizer


class RandomOptimizer(AbstractOptimizer):
    # Unclear what is best package to list for primary_import here.
    primary_import = "bayesmark"

    def __init__(self, api_config, random=np_util.random):
        """Build wrapper class to use random search function in benchmark.

        Settings for `suggest_dict` can be passed using kwargs.

        Parameters
        ----------
        api_config : dict-like of dict-like
            Configuration of the optimization variables. See API description.
        """
        AbstractOptimizer.__init__(self, api_config)
        self.random = random

    def suggest(self, n_suggestions=1):
        """Get suggestion.

        Parameters
        ----------
        n_suggestions : int
            Desired number of parallel suggestions in the output

        Returns
        -------
        next_guess : list of dict
            List of `n_suggestions` suggestions to evaluate the objective
            function. Each suggestion is a dictionary where each key
            corresponds to a parameter being optimized.
        """
        x_guess = rs.suggest_dict([], [], self.api_config, n_suggestions=n_suggestions, random=self.random)
        return x_guess

    def observe(self, X, y):
        """Feed an observation back.

        Parameters
        ----------
        X : list of dict-like
            Places where the objective function has already been evaluated.
            Each suggestion is a dictionary where each key corresponds to a
            parameter being optimized.
        y : array-like, shape (n,)
            Corresponding values where objective has been evaluated
        """
        # Random search so don't do anything
        pass


# All optimizer wrappers need to assign their wrapper to the name opt_wrapper because experiment always tries to import
# opt_wrapper regardless of the optimizer it is importing.
opt_wrapper = RandomOptimizer


================================================
FILE: bayesmark/builtin_opt/scikit_optimizer.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from scipy.interpolate import interp1d
from skopt import Optimizer as SkOpt
from skopt.space import Categorical, Integer, Real

from bayesmark.abstract_optimizer import AbstractOptimizer


class ScikitOptimizer(AbstractOptimizer):
    primary_import = "scikit-optimize"

    def __init__(self, api_config, base_estimator="GP", acq_func="gp_hedge", n_initial_points=5, **kwargs):
        """Build wrapper class to use an optimizer in benchmark.

        Parameters
        ----------
        api_config : dict-like of dict-like
            Configuration of the optimization variables. See API description.
        base_estimator : {'GP', 'RF', 'ET', 'GBRT'}
            How to estimate the objective function.
        acq_func : {'LCB', 'EI', 'PI', 'gp_hedge', 'EIps', 'PIps'}
            Acquisition objective to decide next suggestion.
        n_initial_points : int
            Number of points to sample randomly before actual Bayes opt.
        """
        AbstractOptimizer.__init__(self, api_config)

        dimensions, self.round_to_values = ScikitOptimizer.get_sk_dimensions(api_config)

        # Older versions of skopt don't copy over the dimensions names during
        # normalization and hence the names are missing in
        # self.skopt.space.dimensions. Therefore, we save our own copy of
        # dimensions list to be safe. If we can commit to using the newer
        # versions of skopt we can delete self.dimensions.
        self.dimensions_list = tuple(dd.name for dd in dimensions)

        # Undecided where we want to pass the kwargs, so for now just make sure
        # they are blank
        assert len(kwargs) == 0

        self.skopt = SkOpt(
            dimensions,
            n_initial_points=n_initial_points,
            base_estimator=base_estimator,
            acq_func=acq_func,
            acq_optimizer="auto",
            acq_func_kwargs={},
            acq_optimizer_kwargs={},
        )

    @staticmethod
    def get_sk_dimensions(api_config, transform="normalize"):
        """Help routine to setup skopt search space in constructor.

        Take api_config as argument so this can be static.
        """
        # The ordering of iteration prob makes no difference, but just to be
        # safe and consistnent with space.py, I will make sorted.
        param_list = sorted(api_config.keys())

        sk_dims = []
        round_to_values = {}
        for param_name in param_list:
            param_config = api_config[param_name]

            param_type = param_config["type"]

            param_space = param_config.get("space", None)
            param_range = param_config.get("range", None)
            param_values = param_config.get("values", None)

            # Some setup for case that whitelist of values is provided:
            values_only_type = param_type in ("cat", "ordinal")
            if (param_values is not None) and (not values_only_type):
                assert param_range is None
                param_values = np.unique(param_values)
                param_range = (param_values[0], param_values[-1])
                round_to_values[param_name] = interp1d(
                    param_values, param_values, kind="nearest", fill_value="extrapolate"
                )

            if param_type == "int":
                # Integer space in sklearn does not support any warping => Need
                # to leave the warping as linear in skopt.
                sk_dims.append(Integer(param_range[0], param_range[-1], transform=transform, name=param_name))
            elif param_type == "bool":
                assert param_range is None
                assert param_values is None
                sk_dims.append(Integer(0, 1, transform=transform, name=param_name))
            elif param_type in ("cat", "ordinal"):
                assert param_range is None
                # Leave x-form to one-hot as per skopt default
                sk_dims.append(Categorical(param_values, name=param_name))
            elif param_type == "real":
                # Skopt doesn't support all our warpings, so need to pick
                # closest substitute it does support.
                prior = "log-uniform" if param_space in ("log", "logit") else "uniform"
                sk_dims.append(Real(param_range[0], param_range[-1], prior=prior, transform=transform, name=param_name))
            else:
                assert False, "type %s not handled in API" % param_type
        return sk_dims, round_to_values

    def suggest(self, n_suggestions=1):
        """Get a suggestion from the optimizer.

        Parameters
        ----------
        n_suggestions : int
            Desired number of parallel suggestions in the output

        Returns
        -------
        next_guess : list of dict
            List of `n_suggestions` suggestions to evaluate the objective
            function. Each suggestion is a dictionary where each key
            corresponds to a parameter being optimized.
        """
        # First get list of lists from skopt.ask()
        next_guess = self.skopt.ask(n_points=n_suggestions)
        # Then convert to list of dicts
        next_guess = [dict(zip(self.dimensions_list, x)) for x in next_guess]

        # Now do the rounding, custom rounding is not supported in skopt. Note
        # that there is not nec a round function for each dimension here.
        for param_name, round_f in self.round_to_values.items():
            for xx in next_guess:
                xx[param_name] = round_f(xx[param_name])
        return next_guess

    def observe(self, X, y):
        """Send an observation of a suggestion back to the optimizer.

        Parameters
        ----------
        X : list of dict-like
            Places where the objective function has already been evaluated.
            Each suggestion is a dictionary where each key corresponds to a
            parameter being optimized.
        y : array-like, shape (n,)
            Corresponding values where objective has been evaluated
        """
        # Supposedly skopt can handle blocks, but not sure about interface for
        # that. Just do loop to be safe for now.
        for xx, yy in zip(X, y):
            # skopt needs lists instead of dicts
            xx = [xx[dim_name] for dim_name in self.dimensions_list]
            # Just ignore, any inf observations we got, unclear if right thing
            if np.isfinite(yy):
                self.skopt.tell(xx, yy)


opt_wrapper = ScikitOptimizer


================================================
FILE: bayesmark/cmd_parse.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Anything related to parsing command line arguments goes in here. There are some custom data structures to
represent all the options available in the experiments here.

Not currently any doc strings in this file because it may become obsolete with the use of fire package.
"""
import argparse
import json
import os.path
import sys
import uuid as pyuuid
from enum import IntEnum, auto
from pathlib import PosixPath

import git
from git.exc import InvalidGitRepositoryError
from pathvalidate.argparse import sanitize_filename, validate_filename, validate_filepath

from bayesmark.builtin_opt.config import CONFIG
from bayesmark.constants import ARG_DELIM, DATA_LOADER_NAMES, METRICS, MODEL_NAMES, OPTIMIZERS_FILE, PY_INTERPRETER
from bayesmark.path_util import absopen, abspath
from bayesmark.util import shell_join

assert not any(ARG_DELIM in opt for opt in MODEL_NAMES)
assert not any(ARG_DELIM in opt for opt in DATA_LOADER_NAMES)


class CmdArgs(IntEnum):
    uuid = auto()
    db_root = auto()
    optimizer_root = auto()
    data_root = auto()
    db = auto()
    optimizer = auto()
    data = auto()
    classifier = auto()
    metric = auto()
    n_calls = auto()
    n_suggest = auto()
    n_repeat = auto()
    n_jobs = auto()
    jobs_file = auto()
    ravel = auto()
    verbose = auto()
    dry_run = auto()
    rev = auto()
    opt_rev = auto()
    timeout = auto()


CMD_STR = {
    CmdArgs.uuid: ("-u", "--uuid"),
    CmdArgs.db_root: ("-dir", "-db-root"),
    CmdArgs.optimizer_root: ("-odir", "--opt-root"),
    CmdArgs.data_root: ("-dr", "--data-root"),
    CmdArgs.db: ("-b", "--db"),
    CmdArgs.optimizer: ("-o", "--opt"),
    CmdArgs.data: ("-d", "--data"),
    CmdArgs.classifier: ("-c", "--classifier"),
    CmdArgs.metric: ("-m", "--metric"),
    CmdArgs.n_calls: ("-n", "--calls"),
    CmdArgs.n_suggest: ("-p", "--suggestions"),
    CmdArgs.n_repeat: ("-r", "--repeat"),
    CmdArgs.n_jobs: ("-nj", "--num-jobs"),
    CmdArgs.jobs_file: ("-ofile", "--jobs-file"),
    CmdArgs.ravel: ("-rv", "--ravel"),
    CmdArgs.verbose: ("-v", "--verbose"),
    CmdArgs.timeout: ("-t", "--timeout"),
    CmdArgs.dry_run: (None, "dry_run"),  # Will not be specified from CLI
    CmdArgs.rev: (None, "rev"),  # Will not be specified from CLI
    CmdArgs.opt_rev: (None, "opt_rev"),  # Will not be specified from CLI. Which version of optimizer.
}


def arg_to_str(arg):
    # We can change this so it is arg.value, or someway to be usable by field interface
    _, dest = str(arg).split(".")
    return dest


def namespace_to_dict(args_ns):
    args = vars(args_ns)
    args = {kk: args[arg_to_str(kk)] for kk in CMD_STR if (arg_to_str(kk) in args)}
    return args


def serializable_dict(args):
    args_str = {CMD_STR[kk][1]: args[kk] for kk in CMD_STR if (kk in args)}
    assert len(args_str) == len(args)
    return args_str


def unserializable_dict(args_str):
    args = {kk: args_str[CMD_STR[kk][1]] for kk in CMD_STR if (CMD_STR[kk][1] in args_str)}
    assert len(args_str) == len(args)
    return args


def add_argument(parser, arg, **kwargs):
    short_name, long_name = CMD_STR[arg]
    dest = arg_to_str(arg)
    parser.add_argument(short_name, long_name, dest=dest, **kwargs)


def filepath(value):
    """Work around for `pathvalidate` bug."""
    if value == ".":
        return value
    validate_filepath(value, platform="auto")
    return value


def filename(value):
    validate_filename(value, platform="universal")
    return value


def uuid(val_str):
    val = str.lower(val_str)
    uuid_ = pyuuid.UUID(hex=val)
    assert val == uuid_.hex, "error in parsing uuid"
    return val


def positive_int(val_str):
    val = int(val_str)
    if val <= 0:
        msg = "expected positive, got %s" % val_str
        raise argparse.ArgumentTypeError(msg)
    return val


def joinable(val_str):
    val = str(val_str)  # just for good measure
    validate_filename(val, platform="universal")  # we choose to be at least as strict as filenames
    if ARG_DELIM in val:
        msg = "delimiter %s not allowed in choice %s" % (ARG_DELIM, val)
        raise argparse.ArgumentTypeError(msg)
    return val


def load_rev_number():
    # This function uses a lot of language "power features" that could be considered bad form:
    # 1) does a conditional import to get version
    # 2) uses __file__ to try and extract and git repo version during execution
    # We will let this fly anyway because:
    # 1) The results of this are only used for logging anyway
    # 2) This is a command parsing module of the code and inherently very non-pure and doing IO etc
    # 3) Unclear if there is a cleaner way to do this

    # Get rev from version file (if running inside the pip-installable wheel without the git repo)
    try:
        from bayesmark import version

        rev_file = version.VERSION
    except ImportError:
        rev_file = None
    else:
        rev_file = rev_file.strip()
        rev = rev_file

    # Get rev from git API if inside git repo (and not built wheel from pip install ...)
    wdir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
    try:
        repo = git.Repo(path=wdir, search_parent_directories=False)
    except InvalidGitRepositoryError:
        rev_repo = None
    else:
        rev_repo = repo.head.commit.hexsha
        rev_repo = rev_repo.strip()
        rev = rev_repo

    # Check coherence of what we found
    if (rev_repo is None) and (rev_file is None):
        raise RuntimeError("Must specify version.py if not inside a git repo.")
    if (rev_repo is not None) and (rev_file is not None):
        assert rev_repo == rev_file, "Rev file %s does not match rev git %s" % (rev_file, rev_repo)

    assert rev == rev.strip()
    # We could first enforce is_lower_hex if we want to enforce that
    rev = rev[:7]
    return rev


def base_parser():
    parser = argparse.ArgumentParser(add_help=False)

    add_argument(
        parser, CmdArgs.db_root, default=".", type=filepath, help="root directory for all benchmark experiments output"
    )
    add_argument(
        parser, CmdArgs.optimizer_root, default=".", type=filepath, help="Directory with optimization wrappers"
    )
    # Always a verbose flag option
    add_argument(parser, CmdArgs.verbose, action="store_true", help="print the study logs to console")
    return parser


def launcher_parser(description):
    parser = argparse.ArgumentParser(description=description, parents=[base_parser()])

    add_argument(parser, CmdArgs.uuid, type=uuid, help="length 32 hex UUID for this experiment")
    add_argument(parser, CmdArgs.data_root, type=filepath, help="root directory for all custom csv files")
    add_argument(parser, CmdArgs.db, type=filename, help="database ID of this benchmark experiment")

    add_argument(parser, CmdArgs.optimizer, type=joinable, nargs="+", help="optimizers to use")
    add_argument(parser, CmdArgs.data, type=joinable, nargs="+", help="data sets to use")
    add_argument(parser, CmdArgs.classifier, type=joinable, nargs="+", help="classifiers to use")
    add_argument(parser, CmdArgs.metric, type=str, choices=METRICS, nargs="+", help="scoring metric to use")

    # Iterations counts used in experiments
    add_argument(parser, CmdArgs.n_calls, default=100, type=positive_int, help="number of function evaluations")
    add_argument(
        parser, CmdArgs.n_suggest, default=1, type=positive_int, help="number of suggestions to provide in parallel"
    )
    add_argument(parser, CmdArgs.n_repeat, default=20, type=positive_int, help="number of repetitions of each study")
    add_argument(parser, CmdArgs.timeout, default=0, type=int, help="Timeout per experiment (0 = no timeout)")

    # Arguments for creating dry run jobs file
    add_argument(
        parser,
        CmdArgs.n_jobs,
        type=int,
        default=0,
        help="number of jobs to put in the dry run file, the default 0 value disables dry run (real run)",
    )
    # Using default of current dir for jobs file output since that is generally the default for everything
    add_argument(
        parser, CmdArgs.jobs_file, type=filepath, default="./jobs.txt", help="a jobs file with all commands to be run"
    )
    return parser


def experiment_parser(description):
    parser = argparse.ArgumentParser(description=description, parents=[base_parser()])

    add_argument(parser, CmdArgs.uuid, type=uuid, required=True, help="length 32 hex UUID for this experiment")

    # This could be made simpler and use '.' default for dataroot, even if no custom data used.
    add_argument(parser, CmdArgs.data_root, type=filepath, help="root directory for all custom csv files")
    add_argument(parser, CmdArgs.db, type=filename, required=True, help="database ID of this benchmark experiment")
    add_argument(parser, CmdArgs.optimizer, required=True, type=joinable, help="optimizer to use")
    add_argument(parser, CmdArgs.data, required=True, type=joinable, help="data set to use")
    add_argument(parser, CmdArgs.classifier, required=True, type=joinable, help="classifier to use")
    add_argument(parser, CmdArgs.metric, required=True, type=str, choices=METRICS, help="scoring metric to use")

    add_argument(parser, CmdArgs.n_calls, default=100, type=positive_int, help="number of function evaluations")
    add_argument(
        parser, CmdArgs.n_suggest, default=1, type=positive_int, help="number of suggestions to provide in parallel"
    )
    return parser


def agg_parser(description):
    parser = argparse.ArgumentParser(description=description, parents=[base_parser()])
    add_argument(parser, CmdArgs.db, type=filename, required=True, help="database ID of this benchmark experiment")
    add_argument(
        parser,
        CmdArgs.ravel,
        action="store_true",
        help="ravel all studies to store batch suggestions as if they were serial (deprecated)",
    )
    return parser


def general_parser(description):
    parser = argparse.ArgumentParser(description=description, parents=[base_parser()])
    add_argument(parser, CmdArgs.db, type=filename, required=True, help="database ID of this benchmark experiment")
    return parser


def parse_args(parser, argv=None):
    """Note that this argument parser does not check compatibility between clf/reg metric and data set.
    """
    args = parser.parse_args(argv)
    args = namespace_to_dict(args)

    args[CmdArgs.dry_run] = (CmdArgs.n_jobs in args) and (args[CmdArgs.n_jobs] > 0)
    # Does not check dir actually exists here, but whatever
    args[CmdArgs.jobs_file] = abspath(args[CmdArgs.jobs_file], verify=False) if args[CmdArgs.dry_run] else None

    # Then make sure all path vars are abspath:
    # Dry run might be executing on diff system => cannot verify yet
    args[CmdArgs.db_root] = abspath(args[CmdArgs.db_root], verify=not args[CmdArgs.dry_run])
    args[CmdArgs.optimizer_root] = abspath(args[CmdArgs.optimizer_root], verify=True)
    if (CmdArgs.data_root in args) and (args[CmdArgs.data_root] is not None):
        args[CmdArgs.data_root] = abspath(args[CmdArgs.data_root], verify=not args[CmdArgs.dry_run])

    # Get git version of the benchmark itself for meta-data, just in case we need it.
    args[CmdArgs.rev] = load_rev_number()

    # We may support ability to specify version at args in the future, from now it is implied
    args[CmdArgs.opt_rev] = None
    return args


def _cleanup(filename_str):
    filename_str = sanitize_filename(filename_str, replacement_text="-", platform="universal")
    filename_str = filename_str.replace(ARG_DELIM, "-")
    return filename_str


def infer_settings(opt_root, opt_pattern="**/optimizer.py"):
    opt_root = PosixPath(opt_root)
    assert opt_root.is_dir(), "Opt root directory doesn't exist: %s" % opt_root
    assert opt_root.is_absolute(), "Only absolute path should have even gotten this far."

    # Always sort for reproducibility
    source_files = sorted(opt_root.glob(opt_pattern))
    source_files = [ss.relative_to(opt_root) for ss in source_files]

    settings = {_cleanup(str(ss.parent)): [str(ss), {}] for ss in source_files}

    assert all(joinable(kk) for kk in settings), "Something went wrong in name sanitization."
    assert len(settings) == len(source_files), "Name collision after sanitization of %s" % repr(source_files)
    assert len(set(CONFIG.keys()) & set(settings.keys())) == 0, "Name collision with builtin optimizers."

    return settings


def load_optimizer_settings(opt_root):
    try:
        with absopen(os.path.join(opt_root, OPTIMIZERS_FILE), "r") as f:
            settings = json.load(f)
    except FileNotFoundError:
        # Search for optimizers instead
        settings = infer_settings(opt_root)

    assert isinstance(settings, dict)
    assert not any((ARG_DELIM in opt) for opt in settings), "optimizer names violates name convention"
    return settings


def cmd_str():
    cmd = "%s %s" % (PY_INTERPRETER, shell_join(sys.argv))
    return cmd


================================================
FILE: bayesmark/constants.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""General constants that are used in multiple files in the code base.
"""

# Special constant for random search since it gets used as our reference point in the baselines
RANDOM_SEARCH = "RandomSearch"
OPTIMIZERS_FILE = "config.json"
ARG_DELIM = "_"  # Delimeter used when concat cmd argument for any reason
PY_INTERPRETER = "python"  # What command to call for sub process, we could specify version number here also.

# Variables to save in SAL
EVAL = "eval"
TIME = "time"
SUGGEST_LOG = "suggest_log"
EXP_VARS = (EVAL, TIME, SUGGEST_LOG)

# Derived variables to save in SAL
TIME_RESULTS = "time"
EVAL_RESULTS = "eval"
BASELINE = "baseline"
PERF_RESULTS = "perf"
MEAN_SCORE = "summary"

# Coordinate dim names needed in saved xr Datasets
ITER = "iter"
TEST_CASE = "function"
METHOD = "optimizer"
TRIAL = "study_id"
SUGGEST = "suggestion"
OBJECTIVE = "objective"

# Dataset variables for eval results
VISIBLE_TO_OPT = "_visible_to_opt"

# Dataset variables for time results
SUGGEST_PHASE = "suggest"
OBS_PHASE = "observe"
EVAL_PHASE = "eval"
EVAL_PHASE_SUM = "eval_sum"
EVAL_PHASE_MAX = "eval_max"

# Dataset variables for aggregate results
PERF_MED = "median"
LB_MED = "median LB"
UB_MED = "median UB"
NORMED_MED = "median normed"
PERF_MEAN = "mean"
LB_MEAN = "mean LB"
UB_MEAN = "mean UB"
NORMED_MEAN = "mean normed"
LB_NORMED_MEAN = "mean normed LB"
UB_NORMED_MEAN = "mean normed UB"
PERF_BEST = "best"
PERF_CLIP = "clip"

# Choices used for test problems, there is some redundant specification with sklearn funcs file here
MODEL_NAMES = ("DT", "MLP-adam", "MLP-sgd", "RF", "SVM", "ada", "kNN", "lasso", "linear")
DATA_LOADER_NAMES = ("breast", "digits", "iris", "wine", "boston", "diabetes")

SCORERS_CLF = ("nll", "acc")
SCORERS_REG = ("mae", "mse")
METRICS = tuple(sorted(SCORERS_CLF + SCORERS_REG))


================================================
FILE: bayesmark/data.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Module to deal with all matters relating to loading example data sets, which we tune ML models to.
"""
from enum import IntEnum, auto

import numpy as np
import pandas as pd  # only needed for csv reader, maybe try something else
from sklearn import datasets

from bayesmark.constants import DATA_LOADER_NAMES, SCORERS_CLF, SCORERS_REG
from bayesmark.path_util import join_safe_r
from bayesmark.stats import robust_standardize


class ProblemType(IntEnum):
    """The different problem types we consider. Currently, just regression (`reg`) and classification (`clf`).
    """

    clf = auto()
    reg = auto()


DATA_LOADERS = {
    "digits": (datasets.load_digits, ProblemType.clf),
    "iris": (datasets.load_iris, ProblemType.clf),
    "wine": (datasets.load_wine, ProblemType.clf),
    "breast": (datasets.load_breast_cancer, ProblemType.clf),
    "boston": (datasets.load_boston, ProblemType.reg),
    "diabetes": (datasets.load_diabetes, ProblemType.reg),
}

assert sorted(DATA_LOADERS.keys()) == sorted(DATA_LOADER_NAMES)

# Arguably, this could go in constants, but doesn't cause extra imports being here.
METRICS_LOOKUP = {ProblemType.clf: SCORERS_CLF, ProblemType.reg: SCORERS_REG}


def get_problem_type(dataset_name):
    """Determine if this dataset is a regression of classification problem.

    Parameters
    ----------
    dataset : str
        Which data set to use, must be key in `DATA_LOADERS` dict, or name of custom csv file.

    Returns
    -------
    problem_type : ProblemType
        `Enum` to indicate if regression of classification data set.
    """
    if dataset_name in DATA_LOADERS:
        _, problem_type = DATA_LOADERS[dataset_name]
        return problem_type

    # Maybe we can come up with a better system, but for now let's use a convention based on the naming of the csv file.
    if dataset_name.startswith("reg-"):
        return ProblemType.reg
    if dataset_name.startswith("clf-"):
        return ProblemType.clf
    assert False, "Can't determine problem type from dataset name."


def _csv_loader(dataset_name, return_X_y, data_root, clip_x=100):  # pragma: io
    """Load custom csv files for use in the benchmark.

    This function assumes ``dataset_name + ".csv"`` is a csv file found in the `data_root` path.  It also assumes the
    last column of the csv file is the target and the other columns are features.

    The target column should be `int` for classification and `float` for regression. Column names ending in ``"_cat"``
    are assumed to be categorical and will be one-hot encoded.

    The features (and target for regression) are robust standardized. The features are also clipped to be in
    ``[-clip_x, clip_x]`` *after* standardization.
    """
    assert return_X_y, "Only returning (X,y) tuple supported right now."
    assert clip_x >= 0

    # Quantile range for robust standardization. The 86% range is the most efficient for Gaussians. See:
    # https://github.com/scikit-learn/scikit-learn/issues/10139#issuecomment-344705040
    q_level = 0.86

    path = join_safe_r(data_root, dataset_name + ".csv")

    # For now, use convention that can get problem type based on data set name
    problem_type = get_problem_type(dataset_name)

    # Assuming no missing data in source csv files at the moment, these will
    # result in error.
    df = pd.read_csv(
        path, header=0, index_col=False, engine="c", na_filter=False, true_values=["true"], false_values=["false"]
    )

    label = df.columns[-1]  # Assume last col is target

    target = df.pop(label).values
    if problem_type == ProblemType.clf:
        assert target.dtype in (np.bool_, np.int_)
        target = target.astype(np.int_)  # convert to int for skl
    if problem_type == ProblemType.reg:
        assert target.dtype == np.float_
        # 86% range is the most efficient (at least for Gaussians)
        target = robust_standardize(target, q_level=q_level)

    # Fill in an categorical variables (object dtype of cols names ..._cat)
    cat_cols = sorted(cc for cc in df.columns if cc.endswith("_cat") or df[cc].dtype.kind == "O")
    df = pd.get_dummies(df, columns=cat_cols, drop_first=True, dtype=np.float_)
    # Could also sort all columns to be sure it will be reprod

    # Everything should now be in float
    assert (df.dtypes == np.float_).all()

    data = df.values
    data = robust_standardize(data, q_level=q_level)
    # Debatable if we should include this, but there are a lot of outliers
    data = np.clip(data, -clip_x, clip_x)

    # We should probably do some logging or something to wrap up
    return data, target, problem_type


def load_data(dataset_name, data_root=None):  # pragma: io
    """Load a data set and return it in, pre-processed into numpy arrays.

    Parameters
    ----------
    dataset : str
        Which data set to use, must be key in `DATA_LOADERS` dict, or name of custom csv file.
    data_root : str
        Root directory to look for all custom csv files. May be ``None`` for sklearn data sets.

    Returns
    -------
    data : :class:`numpy:numpy.ndarray` of shape (n, d)
        The feature matrix of the data set. It will be `float` array.
    target : :class:`numpy:numpy.ndarray` of shape (n,)
        The target vector for the problem, which is `int` for classification and `float` for regression.
    problem_type : :class:`bayesmark.data.ProblemType`
        `Enum` to indicate if regression of classification data set.
    """
    if dataset_name in DATA_LOADERS:
        loader_f, problem_type = DATA_LOADERS[dataset_name]
        data, target = loader_f(return_X_y=True)
    else:  # try to load as custom csv
        assert data_root is not None, "data root cannot be None when custom csv requested."
        data, target, problem_type = _csv_loader(dataset_name, return_X_y=True, data_root=data_root)
    return data, target, problem_type


================================================
FILE: bayesmark/expected_max.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Compute expected maximum or minimum from iid samples.
"""
import numpy as np
from scipy.special import gammaln, logsumexp


def get_expected_max_weights(n, m):
    """Get the L-estimator weights for computing unbiased estimator of expected ``max(x[1:m])`` on a data set.

    Parameters
    ----------
    n : int
        Number of data points in data set ``len(x)``. Must be ``>= 1``.
    m : `int` or :class:`numpy:numpy.ndarray` with dtype `int`
        This function is for estimating the expected maximum over `m` iid draws. Require ``m >= 1``. This can be
        broadcasted. If ``m > n``, the weights will be nan, because there is no way to get unbiased estimate in that
        case.

    Returns
    -------
    pdf : :class:`numpy:numpy.ndarray`, shape (n,)
        The weights for L-estimator. Will be positive and sum to one.
    """
    assert np.ndim(n) == 0
    assert n >= 1  # otherwise makes no sense

    m = np.asarray(m)  # Must be np type for broadcasting
    # We could also check dtype is int, but not bothering here
    assert np.all(m >= 1)  # otherwise makes no sense
    m = m[..., None]

    kk = 1 + np.arange(n)
    lpdf = gammaln(kk) - gammaln(kk - (m - 1))
    pdf = np.exp(lpdf - logsumexp(lpdf, axis=-1, keepdims=True))
    # expect nan for m > n
    assert np.all((m > n) | np.isclose(np.sum(pdf, axis=-1, keepdims=True), 1.0))
    return pdf


def expected_max(x, m):
    """Compute unbiased estimator of expected ``max(x[1:m])`` on a data set.

    Parameters
    ----------
    x : :class:`numpy:numpy.ndarray` of shape (n,)
        Data set we would like expected ``max(x[1:m])`` on.
    m : `int` or :class:`numpy:numpy.ndarray` with dtype `int`
        This function is for estimating the expected maximum over `m` iid draws. Require ``m >= 1``. This can be
        broadcasted. If ``m > n``, the weights will be nan, because there is no way to get unbiased estimate in that
        case.

    Returns
    -------
    E_max_x : float
        Unbiased estimate of mean max of `m` draws from distribution on `x`.
    """
    assert np.ndim(x) == 1
    # m is validated by get_expected_max_weights

    # Get order stats for L-estimator
    x = np.array(x, copy=True)  # we will modify in place
    x.sort()  # in place!!

    # Now get estimator weights
    n, = x.shape
    if n == 0:
        return np.full(np.shape(m), np.nan)
    pdf = get_expected_max_weights(n, m)

    # Compute L-estimator
    E_max_x = np.sum(x * pdf, axis=-1)
    return E_max_x


def expected_min(x, m):
    """Compute unbiased estimator of expected ``min(x[1:m])`` on a data set.

    Parameters
    ----------
    x : :class:`numpy:numpy.ndarray` of shape (n,)
        Data set we would like expected ``min(x[1:m])`` on. Require ``len(x) >= 1``.
    m : `int` or :class:`numpy:numpy.ndarray` with dtype `int`
        This function is for estimating the expected minimum over `m` iid draws. Require ``m >= 1``. This can be
        broadcasted. If ``m > n``, the weights will be nan, because there is no way to get unbiased estimate in that
        case.

    Returns
    -------
    E_min_x : float
        Unbiased estimate of mean min of `m` draws from distribution on `x`.
    """
    x = np.asarray(x)
    E_min_x = -expected_max(-x, m)
    return E_min_x


================================================
FILE: bayesmark/experiment.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Perform a study.
"""
import json
import logging
import random as pyrandom
import uuid
import warnings
from collections import OrderedDict
from time import sleep, time

import numpy as np
import xarray as xr

import bayesmark.cmd_parse as cmd
import bayesmark.constants as cc
import bayesmark.random_search as rs
from bayesmark.builtin_opt.config import CONFIG
from bayesmark.cmd_parse import CmdArgs
from bayesmark.constants import ARG_DELIM, ITER, OBJECTIVE, SUGGEST
from bayesmark.data import METRICS_LOOKUP, get_problem_type
from bayesmark.np_util import argmin_2d, linear_rescale, random_seed
from bayesmark.serialize import XRSerializer
from bayesmark.signatures import analyze_signature_pair, get_func_signature
from bayesmark.sklearn_funcs import SklearnModel, SklearnSurrogate
from bayesmark.space import JointSpace
from bayesmark.util import chomp, str_join_safe

logger = logging.getLogger(__name__)

# For now treat the objective names as global const. However, in the future these could vary by type of problem.
OBJECTIVE_NAMES = SklearnModel.objective_names


def _build_test_problem(model_name, dataset, scorer, path):
    """Build the class with the class to use an objective. Sort of a factory.

    Parameters
    ----------
    model_name : str
        Which sklearn model we are attempting to tune, must be an element of `constants.MODEL_NAMES`.
    dataset : str
        Which data set the model is being tuned to, which must be either a) an element of
        `constants.DATA_LOADER_NAMES`, or b) the name of a csv file in the `data_root` folder for a custom data set.
    scorer : str
        Which metric to use when evaluating the model. This must be an element of `sklearn_funcs.SCORERS_CLF` for
        classification models, or `sklearn_funcs.SCORERS_REG` for regression models.
    path : str or None
        Absolute path to folder containing custom data sets/pickle files with surrogate model.

    Returns
    -------
    prob : :class:`.sklearn_funcs.TestFunction`
        The test function to evaluate in experiments.
    """
    if model_name.endswith("-surr"):
        # Requires IO to test these, so will add the pargma here. Maybe that points towards a possible design change.
        model_name = chomp(model_name, "-surr")  # pragma: io
        prob = SklearnSurrogate(model_name, dataset, scorer, path=path)  # pragma: io
    else:
        prob = SklearnModel(model_name, dataset, scorer, data_root=path)
    return prob


def run_study(optimizer, test_problem, n_calls, n_suggestions, n_obj=1, callback=None):
    """Run a study for a single optimizer on a single test problem.

    This function can be used for benchmarking on general stateless objectives (not just `sklearn`).

    Parameters
    ----------
    optimizer : :class:`.abstract_optimizer.AbstractOptimizer`
        Instance of one of the wrapper optimizers.
    test_problem : :class:`.sklearn_funcs.TestFunction`
        Instance of test function to attempt to minimize.
    n_calls : int
        How many iterations of minimization to run.
    n_suggestions : int
        How many parallel evaluation we run each iteration. Must be ``>= 1``.
    n_obj : int
        Number of different objectives measured, only objective 0 is seen by optimizer. Must be ``>= 1``.
    callback : callable
        Optional callback taking the current best function evaluation, and the number of iterations finished. Takes
        array of shape `(n_obj,)`.

    Returns
    -------
    function_evals : :class:`numpy:numpy.ndarray` of shape (n_calls, n_suggestions, n_obj)
        Value of objective for each evaluation.
    timing_evals : (:class:`numpy:numpy.ndarray`, :class:`numpy:numpy.ndarray`, :class:`numpy:numpy.ndarray`)
        Tuple of 3 timing results: ``(suggest_time, eval_time, observe_time)`` with shapes ``(n_calls,)``,
        ``(n_calls, n_suggestions)``, and ``(n_calls,)``. These are the time to make each suggestion, the time for each
        evaluation of the objective function, and the time to make an observe call.
    suggest_log : list(list(dict(str, object)))
        Log of the suggestions corresponding to the `function_evals`.
    """
    assert n_suggestions >= 1, "batch size must be at least 1"
    assert n_obj >= 1, "Must be at least one objective"

    space_for_validate = JointSpace(test_problem.get_api_config())

    if callback is not None:
        # First do initial log at inf score, in case we don't even get to first eval before crash/job timeout
        callback(np.full((n_obj,), np.inf, dtype=float), 0)

    suggest_time = np.zeros(n_calls)
    observe_time = np.zeros(n_calls)
    eval_time = np.zeros((n_calls, n_suggestions))
    function_evals = np.zeros((n_calls, n_suggestions, n_obj))
    suggest_log = [None] * n_calls
    for ii in range(n_calls):
        tt = time()
        try:
            next_points = optimizer.suggest(n_suggestions)
        except Exception as e:
            logger.warning("Failure in optimizer suggest. Falling back to random search.")
            logger.exception(e, exc_info=True)
            print(json.dumps({"optimizer_suggest_exception": {ITER: ii}}))
            api_config = test_problem.get_api_config()
            next_points = rs.suggest_dict([], [], api_config, n_suggestions=n_suggestions)
        suggest_time[ii] = time() - tt

        logger.info("suggestion time taken %f iter %d next_points %s" % (suggest_time[ii], ii, str(next_points)))
        assert len(next_points) == n_suggestions, "invalid number of suggestions provided by the optimizer"

        # We could put this inside the TestProblem class, but ok here for now.
        try:
            space_for_validate.validate(next_points)  # Fails if suggestions outside allowed range
        except Exception:
            raise ValueError("Optimizer suggestion is out of range.")

        for jj, next_point in enumerate(next_points):
            tt = time()
            try:
                f_current_eval = test_problem.evaluate(next_point)
            except Exception as e:
                logger.warning("Failure in function eval. Setting to inf.")
                logger.exception(e, exc_info=True)
                f_current_eval = np.full((n_obj,), np.inf, dtype=float)
            eval_time[ii, jj] = time() - tt
            assert np.shape(f_current_eval) == (n_obj,)

            suggest_log[ii] = next_points
            function_evals[ii, jj, :] = f_current_eval
            logger.info(
                "function_evaluation time %f value %f suggestion %s"
                % (eval_time[ii, jj], f_current_eval[0], str(next_point))
            )

        # Note: this could be inf in the event of a crash in f evaluation, the optimizer must be able to handle that.
        # Only objective 0 is seen by optimizer.
        eval_list = function_evals[ii, :, 0].tolist()

        if callback is not None:
            idx_ii, idx_jj = argmin_2d(function_evals[: ii + 1, :, 0])
            callback(function_evals[idx_ii, idx_jj, :], ii + 1)

        tt = time()
        try:
            optimizer.observe(next_points, eval_list)
        except Exception as e:
            logger.warning("Failure in optimizer observe. Ignoring these observations.")
            logger.exception(e, exc_info=True)
            print(json.dumps({"optimizer_observe_exception": {ITER: ii}}))
        observe_time[ii] = time() - tt

        logger.info(
            "observation time %f, current best %f at iter %d"
            % (observe_time[ii], np.min(function_evals[: ii + 1, :, 0]), ii)
        )

    return function_evals, (suggest_time, eval_time, observe_time), suggest_log


def run_sklearn_study(
    opt_class, opt_kwargs, model_name, dataset, scorer, n_calls, n_suggestions, data_root=None, callback=None
):
    """Run a study for a single optimizer on a single `sklearn` model/data set combination.

    This routine is meant for benchmarking when tuning `sklearn` models, as opposed to the more general
    :func:`.run_study`.

    Parameters
    ----------
    opt_class : :class:`.abstract_optimizer.AbstractOptimizer`
        Type of wrapper optimizer must be subclass of :class:`.abstract_optimizer.AbstractOptimizer`.
    opt_kwargs : kwargs
        `kwargs` to use when instantiating the wrapper class.
    model_name : str
        Which sklearn model we are attempting to tune, must be an element of `constants.MODEL_NAMES`.
    dataset : str
        Which data set the model is being tuned to, which must be either a) an element of
        `constants.DATA_LOADER_NAMES`, or b) the name of a csv file in the `data_root` folder for a custom data set.
    scorer : str
        Which metric to use when evaluating the model. This must be an element of `sklearn_funcs.SCORERS_CLF` for
        classification models, or `sklearn_funcs.SCORERS_REG` for regression models.
    n_calls : int
        How many iterations of minimization to run.
    n_suggestions : int
        How many parallel evaluation we run each iteration. Must be ``>= 1``.
    data_root : str
        Absolute path to folder containing custom data sets. This may be ``None`` if no custom data sets are used.``
    callback : callable
        Optional callback taking the current best function evaluation, and the number of iterations finished. Takes
        array of shape `(n_obj,)`.

    Returns
    -------
    function_evals : :class:`numpy:numpy.ndarray` of shape (n_calls, n_suggestions, n_obj)
        Value of objective for each evaluation.
    timing_evals : (:class:`numpy:numpy.ndarray`, :class:`numpy:numpy.ndarray`, :class:`numpy:numpy.ndarray`)
        Tuple of 3 timing results: ``(suggest_time, eval_time, observe_time)`` with shapes ``(n_calls,)``,
        ``(n_calls, n_suggestions)``, and ``(n_calls,)``. These are the time to make each suggestion, the time for each
        evaluation of the objective function, and the time to make an observe call.
    suggest_log : list(list(dict(str, object)))
        Log of the suggestions corresponding to the `function_evals`.
    """
    # Setup test function
    function_instance = _build_test_problem(model_name, dataset, scorer, data_root)

    # Setup optimizer
    api_config = function_instance.get_api_config()
    optimizer_instance = opt_class(api_config, **opt_kwargs)

    assert function_instance.objective_names == OBJECTIVE_NAMES
    assert OBJECTIVE_NAMES[0] == cc.VISIBLE_TO_OPT
    n_obj = len(OBJECTIVE_NAMES)

    # Now actually do the experiment
    function_evals, timing, suggest_log = run_study(
        optimizer_instance, function_instance, n_calls, n_suggestions, n_obj=n_obj, callback=callback
    )
    return function_evals, timing, suggest_log


def get_objective_signature(model_name, dataset, scorer, data_root=None):
    """Get signature of an objective function specified by an sklearn model and dataset.

    This routine specializes :func:`.signatures.get_func_signature` for the `sklearn` study case.

    Parameters
    ----------
    model_name : str
        Which sklearn model we are attempting to tune, must be an element of `constants.MODEL_NAMES`.
    dataset : str
        Which data set the model is being tuned to, which must be either a) an element of
        `constants.DATA_LOADER_NAMES`, or b) the name of a csv file in the `data_root` folder for a custom data set.
    scorer : str
        Which metric to use when evaluating the model. This must be an element of `sklearn_funcs.SCORERS_CLF` for
        classification models, or `sklearn_funcs.SCORERS_REG` for regression models.
    data_root : str
        Absolute path to folder containing custom data sets. This may be ``None`` if no custom data sets are used.``

    Returns
    -------
    signature : list(str)
        The signature of this test function.
    """
    function_instance = _build_test_problem(model_name, dataset, scorer, data_root)
    api_config = function_instance.get_api_config()
    signature = get_func_signature(function_instance.evaluate, api_config)
    return signature


def build_eval_ds(function_evals, objective_names):
    """Convert :class:`numpy:numpy.ndarray` with function evaluations to :class:`xarray:xarray.Dataset`.

    This function is a data cleanup routine after running an experiment, before serializing the data to end the study.

    Parameters
    ----------
    function_evals : :class:`numpy:numpy.ndarray` of shape (n_calls, n_suggestions, n_obj)
        Value of objective for each evaluation.
    objective_names : list(str) of shape (n_obj,)
        The names of each objective.

    Returns
    -------
    eval_ds : :class:`xarray:xarray.Dataset`
        :class:`xarray:xarray.Dataset` containing one variable for each objective with the objective function
        evaluations. It has dimensions ``(ITER, SUGGEST)``.
    """
    n_call, n_suggest, n_obj = np.shape(function_evals)
    assert len(objective_names) == n_obj
    assert len(set(objective_names)) == n_obj, "Objective names must be unique"

    coords = {ITER: range(n_call), SUGGEST: range(n_suggest), OBJECTIVE: list(objective_names)}
    dims = (ITER, SUGGEST, OBJECTIVE)
    da = xr.DataArray(data=function_evals, coords=coords, dims=dims)
    eval_ds = da.to_dataset(dim=OBJECTIVE)
    return eval_ds


def build_timing_ds(suggest_time, eval_time, observe_time):
    """Convert :class:`numpy:numpy.ndarray` with timing evaluations to :class:`xarray:xarray.Dataset`.

    This function is a data cleanup routine after running an experiment, before serializing the data to end the study.

    Parameters
    ----------
    suggest_time : :class:`numpy:numpy.ndarray` of shape (n_calls,)
        The time to make each (batch) suggestion.
    eval_time : :class:`numpy:numpy.ndarray` of shape (n_calls, n_suggestions)
        The time for each evaluation of the objective function.
    observe_time : :class:`numpy:numpy.ndarray` of shape (n_calls,)
        The time for each (batch) evaluation of the objective function, and the time to make an observe call.

    Returns
    -------
    time_ds : :class:`xarray:xarray.Dataset`
        Dataset with variables ``(SUGGEST_PHASE, EVAL_PHASE, OBS_PHASE)`` which have dimensions ``(ITER,)``,
        ``(ITER, SUGGEST)``, and ``(ITER,)``, respectively. The variable `EVAL_PHASE` has the function evaluation time
        for each parallel suggestion.
    """
    n_call, n_suggest = np.shape(eval_time)
    assert np.shape(suggest_time) == (n_call,)
    assert np.shape(observe_time) == (n_call,)

    coords = OrderedDict([(ITER, range(n_call)), (SUGGEST, range(n_suggest))])

    data = OrderedDict()
    data[cc.SUGGEST_PHASE] = ((ITER,), suggest_time)
    data[cc.EVAL_PHASE] = ((ITER, SUGGEST), eval_time)
    data[cc.OBS_PHASE] = ((ITER,), observe_time)

    time_ds = xr.Dataset(data, coords=coords)
    return time_ds


def build_suggest_ds(suggest_log):
    """Convert :class:`numpy:numpy.ndarray` with function evaluation inputs to :class:`xarray:xarray.Dataset`.

    This function is a data cleanup routine after running an experiment, before serializing the data to end the study.

    Parameters
    ----------
    suggest_log : list(list(dict(str, object)))
        Log of the suggestions. It has shape `(n_call, n_suggest)`.

    Returns
    -------
    suggest_ds : :class:`xarray:xarray.Dataset`
        :class:`xarray:xarray.Dataset` containing one variable for each input with the objective function evaluations.
        It has dimensions ``(ITER, SUGGEST)``.
    """
    n_call, n_suggest = np.shape(suggest_log)
    assert n_call * n_suggest > 0

    # Setup the dims
    ds_vars = sorted(suggest_log[0][0].keys())
    coords = OrderedDict([(ITER, range(n_call)), (SUGGEST, range(n_suggest))])

    # There is prob a way to vectorize this more but good enough for now. Using np.full to infer dtype from 1st element
    data = OrderedDict([(kk, ((ITER, SUGGEST), np.full((n_call, n_suggest), suggest_log[0][0][kk]))) for kk in ds_vars])
    for ii in range(n_call):
        for jj in range(n_suggest):
            for kk in ds_vars:
                data[kk][1][ii, jj] = suggest_log[ii][jj][kk]

    suggest_ds = xr.Dataset(data, coords=coords)
    return suggest_ds


def load_optimizer_kwargs(optimizer_name, opt_root):  # pragma: io
    """Load the kwarg options for this optimizer being tested.

    This is part of the general experiment setup before a study.

    Parameters
    ----------
    optimizer_name : str
        Name of the optimizer being tested. This optimizer name must be present in optimizer config file.
    opt_root : str
        Absolute path to folder containing the config file.

    Returns
    -------
    kwargs : dict(str, object)
        The kwargs setting to pass into the optimizer wrapper constructor.
    """
    if optimizer_name in CONFIG:
        _, kwargs = CONFIG[optimizer_name]
    else:
        settings = cmd.load_optimizer_settings(opt_root)
        assert optimizer_name in settings, "optimizer %s not found in settings file %s" % optimizer_name
        _, kwargs = settings[optimizer_name]
    return kwargs


def _setup_seeds(hex_str):  # pragma: main
    """This function should only be called from main. Be careful with this function as it manipulates the global random
    streams.

    This is part of the general experiment setup before a study.

    If torch becomes used in any of our optimizers then this will need to come back, could also do TF seed init.
    ```
    torch.manual_seed(random_seed(master_stream))
    if torch.cuda.is_available():
        torch.cuda.manual_seed(random_seed(master_stream))
    ```
    """
    # Set all random seeds: avoid correlated streams ==> must use diff seeds.
    # Could use UUID class, but more direct to just convert the hex to py int.
    # pyrandom is better for master because it is not limited to 32-bit seeds.
    master_stream = pyrandom.Random(int(hex_str, 16))
    pyrandom.seed(random_seed(master_stream))
    np.random.seed(random_seed(master_stream))


def experiment_main(opt_class, args=None):  # pragma: main
    """This is in effect the `main` routine for this experiment. However, it is called from the optimizer wrapper file
    so the class can be passed in. The optimizers are assumed to be outside the package, so the optimizer class can't
    be named from inside the main function without using hacky stuff like `eval`.
    """
    if args is None:
        description = "Run a study with one benchmark function and an optimizer"
        args = cmd.parse_args(cmd.experiment_parser(description))
    args[CmdArgs.opt_rev] = opt_class.get_version()

    run_uuid = uuid.UUID(args[CmdArgs.uuid])

    logging.captureWarnings(True)

    # Setup logging to both a file and stdout (if verbose is set to True)
    logger.setLevel(logging.INFO)  # Note this is the module-wide logger
    logfile = XRSerializer.logging_path(args[CmdArgs.db_root], args[CmdArgs.db], run_uuid)
    logger_file_handler = logging.FileHandler(logfile, mode="w")
    logger.addHandler(logger_file_handler)
    if args[CmdArgs.verbose]:
        logger.addHandler(logging.StreamHandler())

    warnings_logger = logging.getLogger("py.warnings")
    warnings_logger.addHandler(logger_file_handler)
    if args[CmdArgs.verbose]:
        warnings_logger.addHandler(logging.StreamHandler())

    logger.info("running: %s" % str(cmd.serializable_dict(args)))
    logger.info("cmd: %s" % cmd.cmd_str())

    assert (
        args[CmdArgs.metric] in METRICS_LOOKUP[get_problem_type(args[CmdArgs.data])]
    ), "reg/clf metrics can only be used on compatible dataset"

    # Setup random streams for computing the signature, must use same seed
    # across all runs to ensure signature is consistent. This seed is random:
    _setup_seeds("7e9f2cabb0dd4f44bc10cf18e440b427")  # pragma: allowlist secret
    signature = get_objective_signature(
        args[CmdArgs.classifier], args[CmdArgs.data], args[CmdArgs.metric], data_root=args[CmdArgs.data_root]
    )
    logger.info("computed signature: %s" % str(signature))

    opt_kwargs = load_optimizer_kwargs(args[CmdArgs.optimizer], args[CmdArgs.optimizer_root])

    # Setup the call back for intermediate logging
    if cc.BASELINE not in XRSerializer.get_derived_keys(args[CmdArgs.db_root], db=args[CmdArgs.db]):
        warnings.warn("Baselines not found. Will not log intermediate scores.")
        callback = None
    else:
        test_case_str = SklearnModel.test_case_str(args[CmdArgs.classifier], args[CmdArgs.data], args[CmdArgs.metric])
        optimizer_str = str_join_safe(ARG_DELIM, (args[CmdArgs.optimizer], args[CmdArgs.opt_rev], args[CmdArgs.rev]))

        baseline_ds, baselines_meta = XRSerializer.load_derived(
            args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.BASELINE
        )

        # Check the objective function signatures match in the baseline file
        sig_errs, _ = analyze_signature_pair({test_case_str: signature[1]}, baselines_meta["signature"])
        logger.info("Signature errors:\n%s" % sig_errs.to_string())
        print(json.dumps({"exp sig errors": sig_errs.T.to_dict()}))

        def log_mean_score_json(evals, iters):
            assert evals.shape == (len(OBJECTIVE_NAMES),)
            assert not np.any(np.isnan(evals))

            log_msg = {
                cc.TEST_CASE: test_case_str,
                cc.METHOD: optimizer_str,
                cc.TRIAL: args[CmdArgs.uuid],
                cc.ITER: iters,
            }

            for idx, obj in enumerate(OBJECTIVE_NAMES):
                assert OBJECTIVE_NAMES[idx] == obj

                # Extract relevant rescaling info
                slice_ = {cc.TEST_CASE: test_case_str, OBJECTIVE: obj}
                best_opt = baseline_ds[cc.PERF_BEST].sel(slice_, drop=True).values.item()
                base_clip_val = baseline_ds[cc.PERF_CLIP].sel(slice_, drop=True).values.item()

                # Perform the same rescaling as found in experiment_analysis.compute_aggregates()
                score = linear_rescale(evals[idx], best_opt, base_clip_val, 0.0, 1.0, enforce_bounds=False)
                # Also, clip the score from below at -1 to limit max influence of single run on final average
                score = np.clip(score, -1.0, 1.0)
                score = score.item()  # Make easiest for logging in JSON
                assert isinstance(score, float)

                # Note: This is not the raw score but the rescaled one!
                log_msg[obj] = score
            log_msg = json.dumps(log_msg)
            print(log_msg, flush=True)
            # One second safety delay to protect against subprocess stdout getting lost
            sleep(1)

        callback = log_mean_score_json

    # Now set the seeds for the actual experiment
    _setup_seeds(args[CmdArgs.uuid])

    # Now do the experiment
    logger.info(
        "starting sklearn study %s %s %s %s %d %d"
        % (
            args[CmdArgs.optimizer],
            args[CmdArgs.classifier],
            args[CmdArgs.data],
            args[CmdArgs.metric],
            args[CmdArgs.n_calls],
            args[CmdArgs.n_suggest],
        )
    )
    logger.info("with data root: %s" % args[CmdArgs.data_root])
    function_evals, timing, suggest_log = run_sklearn_study(
        opt_class,
        opt_kwargs,
        args[CmdArgs.classifier],
        args[CmdArgs.data],
        args[CmdArgs.metric],
        args[CmdArgs.n_calls],
        args[CmdArgs.n_suggest],
        data_root=args[CmdArgs.data_root],
        callback=callback,
    )

    # Curate results into clean dataframes
    eval_ds = build_eval_ds(function_evals, OBJECTIVE_NAMES)
    time_ds = build_timing_ds(*timing)
    suggest_ds = build_suggest_ds(suggest_log)

    # setup meta:
    meta = {"args": cmd.serializable_dict(args), "signature": signature}
    logger.info("saving meta data: %s" % str(meta))

    # Now the final IO to export the results
    logger.info("saving results")
    XRSerializer.save(eval_ds, meta, args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.EVAL, uuid_=run_uuid)

    logger.info("saving timing")
    XRSerializer.save(time_ds, meta, args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.TIME, uuid_=run_uuid)

    logger.info("saving suggest log")
    XRSerializer.save(suggest_ds, meta, args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.SUGGEST_LOG, uuid_=run_uuid)

    logger.info("done")


def _get_opt_class(opt_name):
    """Load the relevant wrapper class based on this optimizer name.

    There is inherently a bit ugly, but is only called at the main() level before the inner workings get going. There
    are a few ways to do this with some pro and con:
    1) The way done here: based on the filename, load that module via conditional imports and if-else. cons:
        - uses conditional imports
        - must manually repeat yourself in the if-else, but these are checked in unit testing
    2) Import everything and then pick the right optimizer based on a dict of name_str -> class. cons:
        - loads every dependency no matter which is used so could be slow
        - also a stupid dependency might change global state in a way that corrupts experiments
    3) Use the wrapper file as the entry point and add that to setup.py. cons:
        - Will clutter the CLI namespace with one command for each wrapper
    4) Use importlib to import the specified file. cons:
        - Makes assumptions about relative path structure. For pip-installed packages, probably safer to let python
        find the file via import.
    This option (1) seems least objectionable. However, this function could easily be switched to use importlib without
    any changes elsewhere.
    """
    wrapper_file, _ = CONFIG[opt_name]

    if wrapper_file == "hyperopt_optimizer.py":
        import bayesmark.builtin_opt.hyperopt_optimizer as opt
    elif wrapper_file == "nevergrad_optimizer.py":
        import bayesmark.builtin_opt.nevergrad_optimizer as opt
    elif wrapper_file == "opentuner_optimizer.py":
        import bayesmark.builtin_opt.opentuner_optimizer as opt
    elif wrapper_file == "pysot_optimizer.py":
        import bayesmark.builtin_opt.pysot_optimizer as opt
    elif wrapper_file == "random_optimizer.py":
        import bayesmark.builtin_opt.random_optimizer as opt
    elif wrapper_file == "scikit_optimizer.py":
        import bayesmark.builtin_opt.scikit_optimizer as opt
    else:
        assert False, "CONFIG for built in optimizers has added a new optimizer, but not updated this function."

    opt_class = opt.opt_wrapper
    return opt_class


def main():  # pragma: main
    """This is where experiments happen. Usually called by the experiment launcher."""
    description = "Run a study with one benchmark function and an optimizer"
    args = cmd.parse_args(cmd.experiment_parser(description))

    opt_class = _get_opt_class(args[CmdArgs.optimizer])
    experiment_main(opt_class, args=args)


if __name__ == "__main__":
    main()  # pragma: main


================================================
FILE: bayesmark/experiment_aggregate.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Aggregate the results of many studies to prepare analysis.
"""
import json
import logging
from collections import Counter

import numpy as np
import xarray as xr

import bayesmark.constants as cc
import bayesmark.xr_util as xru
from bayesmark.cmd_parse import CmdArgs, agg_parser, parse_args, serializable_dict, unserializable_dict
from bayesmark.constants import ARG_DELIM, EVAL_RESULTS, ITER, METHOD, SUGGEST, TEST_CASE, TIME_RESULTS, TRIAL
from bayesmark.serialize import XRSerializer
from bayesmark.signatures import analyze_signatures
from bayesmark.sklearn_funcs import SklearnModel
from bayesmark.util import str_join_safe

logger = logging.getLogger(__name__)


def validate_time(all_time):
    """Validate the aggregated time data set."""
    assert isinstance(all_time, xr.Dataset)
    assert all_time[cc.SUGGEST_PHASE].dims == (ITER,)
    assert all_time[cc.EVAL_PHASE].dims == (ITER, SUGGEST)
    assert all_time[cc.OBS_PHASE].dims == (ITER,)
    assert xru.is_simple_coords(all_time.coords, min_side=1)


def validate_perf(perf_da):
    """Validate the input eval data arrays."""
    assert isinstance(perf_da, xr.Dataset)
    assert perf_da.dims == (ITER, SUGGEST)
    assert xru.is_simple_coords(perf_da.coords)
    assert not np.any(np.isnan(perf_da.values))


def validate_agg_perf(perf_da, min_trial=1):
    """Validate the aggregated eval data set."""
    assert isinstance(perf_da, xr.DataArray)
    assert perf_da.dims == (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)
    assert xru.is_simple_coords(perf_da.coords, dims=(ITER, SUGGEST, TRIAL))
    assert not np.any(np.isnan(perf_da.values))
    assert perf_da.sizes[TRIAL] >= min_trial


def summarize_time(all_time):
    """Transform a single timing dataset from an experiment into a form better for aggregation.

    Parameters
    ----------
    all_time : :class:`xarray:xarray.Dataset`
        Dataset with variables ``(SUGGEST_PHASE, EVAL_PHASE, OBS_PHASE)`` which have dimensions ``(ITER,)``,
        ``(ITER, SUGGEST)``, and ``(ITER,)``, respectively. The variable `EVAL_PHASE` has the function evaluation time
        for each parallel suggestion.

    Returns
    -------
    time_summary : :class:`xarray:xarray.Dataset`
        Dataset with variables ``(SUGGEST_PHASE, OBS_PHASE, EVAL_PHASE_MAX, EVAL_PHASE_SUM)`` which all have dimensions
        ``(ITER,)``. The maximum `EVAL_PHASE_MAX` is relevant for wall clock time, while `EVAL_PHASE_SUM` is relevant
        for CPU time.
    """
    validate_time(all_time)

    time_summary = xr.Dataset(coords=all_time.coords)

    time_summary[cc.SUGGEST_PHASE] = all_time[cc.SUGGEST_PHASE]
    time_summary[cc.OBS_PHASE] = all_time[cc.OBS_PHASE]
    time_summary[cc.EVAL_PHASE_MAX] = all_time[cc.EVAL_PHASE].max(dim=SUGGEST)
    time_summary[cc.EVAL_PHASE_SUM] = all_time[cc.EVAL_PHASE].sum(dim=SUGGEST)
    return time_summary


def concat_experiments(all_experiments, ravel=False):
    """Aggregate the Datasets from a series of experiments into combined Dataset.

    Parameters
    ----------
    all_experiments : typing.Iterable
        Iterable (possible from a generator) with the Datasets from each experiment. Each item in `all_experiments` is
        a pair containing ``(meta_data, data)``. See `load_experiments` for details on these variables,
    ravel : bool
        If true, ravel all studies to store batch suggestions as if they were serial.

    Returns
    -------
    all_perf : :class:`xarray:xarray.Dataset`
        DataArray containing all of the `perf_da` from the experiments. The meta-data from the experiments are included
        as extra dimensions. `all_perf` has dimensions ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)``. To convert the
        `uuid` to a trial, there must be an equal number of repetition in the experiments for each `TEST_CASE`,
        `METHOD` combination. Likewise, all of the experiments need an equal number of `ITER` and `SUGGEST`. If `ravel`
        is true, then the `SUGGEST` is singleton.
    all_time : :class:`xarray:xarray.Dataset`
        Dataset containing all of the `time_ds` from the experiments. The new dimensions are
        ``(ITER, TEST_CASE, METHOD, TRIAL)``. It has the same variables as `time_ds`.
    all_suggest : :class:`xarray:xarray.Dataset`
        DataArray containing all of the `suggest_ds` from the experiments. It has dimensions
        ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)``.
    all_sigs : dict(str, list(list(float)))
        Aggregate of all experiment signatures.
    """
    all_perf = {}
    all_time = {}
    all_suggest = {}
    all_sigs = {}
    trial_counter = Counter()
    for (test_case, optimizer, uuid), (perf_ds, time_ds, suggest_ds, sig) in all_experiments:
        if ravel:
            raise NotImplementedError("ravel is deprecated. Just reshape in analysis steps instead.")

        case_key = (test_case, optimizer, trial_counter[(test_case, optimizer)])
        trial_counter[(test_case, optimizer)] += 1

        # Process perf data
        assert all(perf_ds[kk].dims == (ITER, SUGGEST) for kk in perf_ds)
        all_perf[case_key] = perf_ds

        # Process time data
        all_time[case_key] = summarize_time(time_ds)

        # Process suggestion data
        all_suggest_curr = all_suggest.setdefault(test_case, {})
        all_suggest_curr[case_key] = suggest_ds

        # Handle the signatures
        all_sigs.setdefault(test_case, []).append(sig)
    assert min(trial_counter.values()) == max(trial_counter.values()), "Uneven number of trials per test case"

    # Now need to concat dict of datasets into single dataset
    all_perf = xru.ds_concat(all_perf, dims=(TEST_CASE, METHOD, TRIAL))
    assert all(all_perf[kk].dims == (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL) for kk in all_perf)
    assert not any(
        np.any(np.isnan(all_perf[kk].values)) for kk in all_perf
    ), "Missing combinations of method and test case"

    all_time = xru.ds_concat(all_time, dims=(TEST_CASE, METHOD, TRIAL))
    assert all(all_time[kk].dims == (ITER, TEST_CASE, METHOD, TRIAL) for kk in all_time)
    assert not any(np.any(np.isnan(all_time[kk].values)) for kk in all_time)
    assert xru.coord_compat((all_perf, all_time), (ITER, TEST_CASE, METHOD, TRIAL))

    for test_case in all_suggest:
        all_suggest[test_case] = xru.ds_concat(all_suggest[test_case], dims=(TEST_CASE, METHOD, TRIAL))
        assert all(
            all_suggest[test_case][kk].dims == (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)
            for kk in all_suggest[test_case]
        )
        assert not any(np.any(np.isnan(all_suggest[test_case][kk].values)) for kk in all_suggest[test_case])
        assert xru.coord_compat((all_perf, all_suggest[test_case]), (ITER, METHOD, TRIAL))
        assert all_suggest[test_case].coords[TEST_CASE].shape == (1,), "test case should be singleton"

    return all_perf, all_time, all_suggest, all_sigs


def load_experiments(uuid_list, db_root, dbid):  # pragma: io
    """Generator to load the results of the experiments.

    Parameters
    ----------
    uuid_list : list(uuid.UUID)
        List of UUIDs corresponding to experiments to load.
    db_root : str
        Root location for data store as requested by the serializer used.
    dbid : str
        Name of the data store as requested by the serializer used.

    Yields
    ------
    meta_data : (str, str, str)
        The `meta_data` contains a `tuple` of `str` with ``test_case, optimizer, uuid``.
    data : (:class:`xarray:xarray.Dataset`, :class:`xarray:xarray.Dataset`, :class:`xarray:xarray.Dataset` list(float))
        The `data` contains a tuple of ``(perf_ds, time_ds, suggest_ds, sig)``. The `perf_ds` is a
        :class:`xarray:xarray.Dataset` containing the evaluation results with dimensions ``(ITER, SUGGEST)``, each
        variable is an objective. The `time_ds` is an :class:`xarray:xarray.Dataset` containing the timing results of
        the form accepted by `summarize_time`. The coordinates must be compatible with `perf_ds`. The suggest_ds is a
        :class:`xarray:xarray.Dataset` containing the inputs to the function evaluations. Each variable is a function
        input. Finally, `sig` contains the `test_case` signature and must be `list(float)`.
    """
    uuids_seen = set()
    for uuid_ in uuid_list:
        logger.info(uuid_.hex)

        # Load perf and timing data
        perf_ds, meta = XRSerializer.load(db_root, db=dbid, key=cc.EVAL, uuid_=uuid_)
        time_ds, meta_t = XRSerializer.load(db_root, db=dbid, key=cc.TIME, uuid_=uuid_)
        assert meta == meta_t, "meta data should between time and eval files"
        suggest_ds, meta_t = XRSerializer.load(db_root, db=dbid, key=cc.SUGGEST_LOG, uuid_=uuid_)
        assert meta == meta_t, "meta data should between suggest and eval files"

        # Get signature to pass out as well
        _, sig = meta["signature"]
        logger.info(meta)
        logger.info(sig)

        # Build the new indices for combined data, this could be put in function for easier testing
        eval_args = unserializable_dict(meta["args"])  # Unpack meta-data
        test_case = SklearnModel.test_case_str(
            eval_args[CmdArgs.classifier], eval_args[CmdArgs.data], eval_args[CmdArgs.metric]
        )
        optimizer = str_join_safe(
            ARG_DELIM, (eval_args[CmdArgs.optimizer], eval_args[CmdArgs.opt_rev], eval_args[CmdArgs.rev])
        )
        args_uuid = eval_args[CmdArgs.uuid]

        # Check UUID sanity
        assert isinstance(args_uuid, str)
        assert args_uuid == uuid_.hex, "UUID meta-data does not match filename"
        assert args_uuid not in uuids_seen, "uuids being reused between studies"
        uuids_seen.add(args_uuid)

        # Return key -> data so this generator can be iterated over in dict like manner
        meta_data = (test_case, optimizer, args_uuid)
        data = (perf_ds, time_ds, suggest_ds, sig)
        yield meta_data, data


def main():
    """See README for instructions on calling aggregate.
    """
    description = "Aggregate study results across functions and optimizers"
    args = parse_args(agg_parser(description))

    logger.setLevel(logging.INFO)  # Note this is the module-wide logger
    if args[CmdArgs.verbose]:
        logger.addHandler(logging.StreamHandler())

    # Get list of UUIDs
    uuid_list = XRSerializer.get_uuids(args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.EVAL)
    uuid_list_ = XRSerializer.get_uuids(args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.TIME)
    assert uuid_list == uuid_list_, "UUID list does not match between time and eval results"
    uuid_list_ = XRSerializer.get_uuids(args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.SUGGEST_LOG)
    assert uuid_list == uuid_list_, "UUID list does not match between suggest log and eval results"

    # Get iterator of all experiment data dumps, load in and process, and concat
    data_G = load_experiments(uuid_list, args[CmdArgs.db_root], args[CmdArgs.db])
    all_perf, all_time, all_suggest, all_sigs = concat_experiments(data_G, ravel=args[CmdArgs.ravel])

    # Check the concat signatures make are coherent
    sig_errs, signatures_median = analyze_signatures(all_sigs)
    logger.info("Signature errors:\n%s" % sig_errs.to_string())
    print(json.dumps({"exp-agg sig errors": sig_errs.T.to_dict()}))

    # Dump and save it all out
    logger.info("saving")
    meta = {"args": serializable_dict(args), "signature": signatures_median}
    XRSerializer.save_derived(all_perf, meta, args[CmdArgs.db_root], db=args[CmdArgs.db], key=EVAL_RESULTS)
    XRSerializer.save_derived(all_time, meta, args[CmdArgs.db_root], db=args[CmdArgs.db], key=TIME_RESULTS)
    for test_case, ds in all_suggest.items():
        XRSerializer.save_derived(ds, meta, args[CmdArgs.db_root], db=args[CmdArgs.db], key=test_case)

    logger.info("done")


if __name__ == "__main__":
    main()  # pragma: main


================================================
FILE: bayesmark/experiment_analysis.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Perform analysis to compare different optimizers across problems.
"""
import json
import logging
import warnings
from collections import OrderedDict

import numpy as np
import xarray as xr

import bayesmark.constants as cc
import bayesmark.quantiles as qt
import bayesmark.xr_util as xru
from bayesmark.cmd_parse import CmdArgs, general_parser, parse_args, serializable_dict
from bayesmark.constants import (
    ITER,
    LB_MEAN,
    LB_MED,
    LB_NORMED_MEAN,
    METHOD,
    NORMED_MEAN,
    NORMED_MED,
    OBJECTIVE,
    PERF_BEST,
    PERF_CLIP,
    PERF_MEAN,
    PERF_MED,
    SUGGEST,
    TEST_CASE,
    TRIAL,
    UB_MEAN,
    UB_MED,
    UB_NORMED_MEAN,
)
from bayesmark.experiment_aggregate import validate_agg_perf
from bayesmark.experiment_baseline import do_baseline
from bayesmark.np_util import cummin, linear_rescale
from bayesmark.serialize import XRSerializer
from bayesmark.signatures import analyze_signature_pair
from bayesmark.stats import t_EB

# Mathematical settings
EVAL_Q = 0.5  # Evaluate based on median loss across n_trials
ALPHA = 0.05  # ==> 95% CIs

logger = logging.getLogger(__name__)


def get_perf_array(evals, evals_visible):
    """Get the actual (e.g., generalization loss) over iterations.

    Parameters
    ----------
    evals : :class:`numpy:numpy.ndarray` of shape (n_iter, n_batch, n_trials)
        The actual loss (e.g., generalization) for a given experiment.
    evals_visible : :class:`numpy:numpy.ndarray` of shape (n_iter, n_batch, n_trials)
        The observable loss (e.g., validation) for a given experiment.

    Returns
    -------
    perf_array : :class:`numpy:numpy.ndarray` of shape (n_iter, n_trials)
        The best performance so far at iteration i from `evals`. Where the best has been selected according to
        `evals_visible`.
    """
    n_iter, _, n_trials = evals.shape
    assert evals.size > 0, "perf array not supported for empty arrays"
    assert evals_visible.shape == evals.shape
    assert not np.any(np.isnan(evals))
    assert not np.any(np.isnan(evals_visible))

    idx = np.argmin(evals_visible, axis=1)
    perf_array = np.take_along_axis(evals, idx[:, None, :], axis=1).squeeze(axis=1)
    assert perf_array.shape == (n_iter, n_trials)

    visible_perf_array = np.min(evals_visible, axis=1)
    assert visible_perf_array.shape == (n_iter, n_trials)

    # Get the minimum from the visible loss
    perf_array = cummin(perf_array, visible_perf_array)
    return perf_array


def compute_aggregates(perf_da, baseline_ds, visible_perf_da=None):
    """Aggregate function evaluations in the experiments to get performance summaries of each method.

    Parameters
    ----------
    perf_da : :class:`xarray:xarray.DataArray`
        Aggregate experimental results with each function evaluation in the experiments according to true loss
        (e.g., generalization). `perf_da` has dimensions ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)`` as is assumed
        to have no nan values.
    baseline_ds : :class:`xarray:xarray.Dataset`
        Dataset with baseline performance. It was variables ``(PERF_MED, PERF_MEAN, PERF_CLIP, PERF_BEST)`` with
        dimensions ``(ITER, TEST_CASE)``, ``(ITER, TEST_CASE)``, ``(TEST_CASE,)``, and ``(TEST_CASE,)``, respectively.
        `PERF_MED` is a baseline of performance based on random search when using medians to summarize performance.
        Likewise, `PERF_MEAN` is for means. `PERF_CLIP` is an upperbound to clip poor performance when using the mean.
        `PERF_BEST` is an estimate on the global minimum.
    visible_perf_da : :class:`xarray:xarray.DataArray`
        Aggregate experimental results with each function evaluation in the experiments according to visible loss
        (e.g., validation). `visible_perf_da` has dimensions ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)`` as is
        assumed to have no nan values. If `None`, we set ``visible_perf_da = perf_da``.

    Returns
    -------
    agg_result : :class:`xarray:xarray.Dataset`
        Dataset with summary of performance for each method and test case combination. Contains variables:
        ``(PERF_MED, LB_MED, UB_MED, NORMED_MED, PERF_MEAN, LB_MEAN, UB_MEAN, NORMED_MEAN)``
        each with dimensions ``(ITER, METHOD, TEST_CASE)``. `PERF_MED` is a median summary of performance with `LB_MED`
        and `UB_MED` as error bars. `NORMED_MED` is a rescaled `PERF_MED` so we expect the optimal performance is 0,
        and random search gives 1 at all `ITER`. Likewise, `PERF_MEAN`, `LB_MEAN`, `UB_MEAN`, `NORMED_MEAN` are for
        mean performance.
    summary : :class:`xarray:xarray.Dataset`
        Dataset with overall summary of performance of each method. Contains variables
        ``(PERF_MED, LB_MED, UB_MED, PERF_MEAN, LB_MEAN, UB_MEAN)``
        each with dimensions ``(ITER, METHOD)``.
    """
    validate_agg_perf(perf_da, min_trial=1)

    assert isinstance(baseline_ds, xr.Dataset)
    assert tuple(baseline_ds[PERF_BEST].dims) == (TEST_CASE,)
    assert tuple(baseline_ds[PERF_CLIP].dims) == (TEST_CASE,)
    assert tuple(baseline_ds[PERF_MED].dims) == (ITER, TEST_CASE)
    assert tuple(baseline_ds[PERF_MEAN].dims) == (ITER, TEST_CASE)
    assert xru.coord_compat((perf_da, baseline_ds), (ITER, TEST_CASE))
    assert not any(np.any(np.isnan(baseline_ds[kk].values)) for kk in baseline_ds)

    # Now actually get the aggregate performance numbers per test case
    agg_result = xru.ds_like(
        perf_da,
        (PERF_MED, LB_MED, UB_MED, NORMED_MED, PERF_MEAN, LB_MEAN, UB_MEAN, NORMED_MEAN),
        (ITER, METHOD, TEST_CASE),
    )
    baseline_mean_da = xru.only_dataarray(xru.ds_like(perf_da, ["ref"], (ITER, TEST_CASE)))
    # Using values here since just clearer to get raw items than xr object for func_name
    for func_name in perf_da.coords[TEST_CASE].values:
        rand_perf_med = baseline_ds[PERF_MED].sel({TEST_CASE: func_name}, drop=True).values
        rand_perf_mean = baseline_ds[PERF_MEAN].sel({TEST_CASE: func_name}, drop=True).values
        best_opt = baseline_ds[PERF_BEST].sel({TEST_CASE: func_name}, drop=True).values
        base_clip_val = baseline_ds[PERF_CLIP].sel({TEST_CASE: func_name}, drop=True).values

        assert np.all(np.diff(rand_perf_med) <= 0), "Baseline should be decreasing with iteration"
        assert np.all(np.diff(rand_perf_mean) <= 0), "Baseline should be decreasing with iteration"
        assert np.all(rand_perf_med > best_opt)
        assert np.all(rand_perf_mean > best_opt)
        assert np.all(rand_perf_mean <= base_clip_val)

        baseline_mean_da.loc[{TEST_CASE: func_name}] = linear_rescale(
            rand_perf_mean, best_opt, base_clip_val, 0.0, 1.0, enforce_bounds=False
        )
        for method_name in perf_da.coords[METHOD].values:
            # Take the minimum over all suggestion at given iter + sanity check perf_da
            curr_da = perf_da.sel({METHOD: method_name, TEST_CASE: func_name}, drop=True)
            assert curr_da.dims == (ITER, SUGGEST, TRIAL)

            if visible_perf_da is None:
                perf_array = get_perf_array(curr_da.values, curr_da.values)

                curr_da_ = perf_da.sel({METHOD: method_name, TEST_CASE: func_name}, drop=True).min(dim=SUGGEST)
                assert curr_da_.dims == (ITER, TRIAL)
                perf_array_ = np.minimum.accumulate(curr_da_.values, axis=0)
                assert np.allclose(perf_array, perf_array_)
            else:
                curr_visible_da = visible_perf_da.sel({METHOD: method_name, TEST_CASE: func_name}, drop=True)
                assert curr_visible_da.dims == (ITER, SUGGEST, TRIAL)
                perf_array = get_perf_array(curr_da.values, curr_visible_da.values)

            # Compute median perf and CI on it
            med_perf, LB, UB = qt.quantile_and_CI(perf_array, EVAL_Q, alpha=ALPHA)
            assert med_perf.shape == rand_perf_med.shape
            agg_result[PERF_MED].loc[{TEST_CASE: func_name, METHOD: method_name}] = med_perf
            agg_result[LB_MED].loc[{TEST_CASE: func_name, METHOD: method_name}] = LB
            agg_result[UB_MED].loc[{TEST_CASE: func_name, METHOD: method_name}] = UB

            # Now store normed version, which is better for aggregation
            normed = linear_rescale(med_perf, best_opt, rand_perf_med, 0.0, 1.0, enforce_bounds=False)
            agg_result[NORMED_MED].loc[{TEST_CASE: func_name, METHOD: method_name}] = normed

            # Store normed mean version
            normed = linear_rescale(perf_array, best_opt, base_clip_val, 0.0, 1.0, enforce_bounds=False)
            # Also, clip the score from below at -1 to limit max influence of single run on final average
            normed = np.clip(normed, -1.0, 1.0)
            normed = np.mean(normed, axis=1)
            agg_result[NORMED_MEAN].loc[{TEST_CASE: func_name, METHOD: method_name}] = normed

            # Compute mean perf and CI on it
            perf_array = np.minimum(base_clip_val, perf_array)
            mean_perf = np.mean(perf_array, axis=1)
            assert mean_perf.shape == rand_perf_mean.shape
            EB = t_EB(perf_array, alpha=ALPHA, axis=1)
            assert EB.shape == rand_perf_mean.shape
            agg_result[PERF_MEAN].loc[{TEST_CASE: func_name, METHOD: method_name}] = mean_perf
            agg_result[LB_MEAN].loc[{TEST_CASE: func_name, METHOD: method_name}] = mean_perf - EB
            agg_result[UB_MEAN].loc[{TEST_CASE: func_name, METHOD: method_name}] = mean_perf + EB
    assert not any(np.any(np.isnan(agg_result[kk].values)) for kk in agg_result)

    # Compute summary score over all test cases, summarize performance of each method
    summary = xru.ds_like(
        perf_da,
        (PERF_MED, LB_MED, UB_MED, PERF_MEAN, LB_MEAN, UB_MEAN, NORMED_MEAN, LB_NORMED_MEAN, UB_NORMED_MEAN),
        (ITER, METHOD),
    )
    summary[PERF_MED], summary[LB_MED], summary[UB_MED] = xr.apply_ufunc(
        qt.quantile_and_CI,
        agg_result[NORMED_MED],
        input_core_dims=[[TEST_CASE]],
        kwargs={"q": EVAL_Q, "alpha": ALPHA},
        output_core_dims=[[], [], []],
    )

    summary[PERF_MEAN] = agg_result[NORMED_MEAN].mean(dim=TEST_CASE)
    EB = xr.apply_ufunc(t_EB, agg_result[NORMED_MEAN], input_core_dims=[[TEST_CASE]])
    summary[LB_MEAN] = summary[PERF_MEAN] - EB
    summary[UB_MEAN] = summary[PERF_MEAN] + EB

    normalizer = baseline_mean_da.mean(dim=TEST_CASE)
    summary[NORMED_MEAN] = summary[PERF_MEAN] / normalizer
    summary[LB_NORMED_MEAN] = summary[LB_MEAN] / normalizer
    summary[UB_NORMED_MEAN] = summary[UB_MEAN] / normalizer

    assert all(tuple(summary[kk].dims) == (ITER, METHOD) for kk in summary)
    return agg_result, summary


def main():
    """See README for instructions on calling analysis.
    """
    description = "Analyze results from aggregated studies"
    args = parse_args(general_parser(description))

    # Metric used on leaderboard
    leaderboard_metric = cc.VISIBLE_TO_OPT

    logger.setLevel(logging.INFO)  # Note this is the module-wide logger
    if args[CmdArgs.verbose]:
        logger.addHandler(logging.StreamHandler())

    # Load in the eval data and sanity check
    perf_ds, meta = XRSerializer.load_derived(args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.EVAL_RESULTS)
    logger.info("Meta data from source file: %s" % str(meta["args"]))

    # Check if there is baselines file, other make one
    if cc.BASELINE not in XRSerializer.get_derived_keys(args[CmdArgs.db_root], db=args[CmdArgs.db]):
        warnings.warn("Baselines not found. Need to construct baseline.")
        do_baseline(args)

    # Load in baseline scores data and sanity check (including compatibility with eval data)
    baseline_ds, meta_ref = XRSerializer.load_derived(args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.BASELINE)
    logger.info("baseline data from source ref file: %s" % str(meta_ref["args"]))

    # Check test case signatures match between eval data and baseline data
    sig_errs, signatures = analyze_signature_pair(meta["signature"], meta_ref["signature"])
    logger.info("Signature errors:\n%s" % sig_errs.to_string())
    print(json.dumps({"exp-anal sig errors": sig_errs.T.to_dict()}))

    # Subset baseline to only the test cases run in the experiments
    test_cases_run = perf_ds.coords[TEST_CASE].values.tolist()
    assert set(test_cases_run) <= set(
        baseline_ds.coords[TEST_CASE].values.tolist()
    ), "Data set contains test cases not found in baseline."
    baseline_ds = baseline_ds.sel({TEST_CASE: test_cases_run})

    # Also subset to allow shorter runs
    iters_run = perf_ds.coords[ITER].values.tolist()
    assert set(iters_run) <= set(
        baseline_ds.coords[ITER].values.tolist()
    ), "Data set not same batch size or too many iters compared to baseline."
    baseline_ds = baseline_ds.sel({ITER: iters_run})

    # Do the actual computation
    perf_visible = perf_ds[cc.VISIBLE_TO_OPT]
    agg_result = OrderedDict()
    summary = OrderedDict()
    for metric_for_scoring in sorted(perf_ds):
        perf_da = perf_ds[metric_for_scoring]
        baseline_ds_ = baseline_ds.sel({OBJECTIVE: metric_for_scoring}, drop=True)
        agg_result[(metric_for_scoring,)], summary[(metric_for_scoring,)] = compute_aggregates(
            perf_da, baseline_ds_, perf_visible
        )
    agg_result = xru.ds_concat(agg_result, dims=(cc.OBJECTIVE,))
    summary = xru.ds_concat(summary, dims=(cc.OBJECTIVE,))

    for metric_for_scoring in sorted(perf_ds):
        # Print summary by problem
        # Recall that:
        # ... summary[PERF_MEAN] = agg_result[NORMED_MEAN].mean(dim=TEST_CASE)
        # ... summary[NORMED_MEAN] = summary[PERF_MEAN] / normalizer
        # Where normalizer is constant across all problems, optimizers
        print("Scores by problem (JSON):\n")
        agg_df = agg_result[NORMED_MEAN].sel({cc.OBJECTIVE: metric_for_scoring}, drop=True)[{ITER: -1}].to_pandas().T
        print(json.dumps({metric_for_scoring: agg_df.to_dict()}))
        print("\n")

        final_score = summary[PERF_MED].sel({cc.OBJECTIVE: metric_for_scoring}, drop=True)[{ITER: -1}]
        logger.info("median score @ %d:\n%s" % (summary.sizes[ITER], xru.da_to_string(final_score)))
        final_score = summary[PERF_MEAN].sel({cc.OBJECTIVE: metric_for_scoring}, drop=True)[{ITER: -1}]
        logger.info("mean score @ %d:\n%s" % (summary.sizes[ITER], xru.da_to_string(final_score)))

        print("Final scores (JSON):\n")
        print(json.dumps({metric_for_scoring: final_score.to_series().to_dict()}))
        print("\n")

        final_score = summary[NORMED_MEAN].sel({cc.OBJECTIVE: metric_for_scoring}, drop=True)[{ITER: -1}]
        logger.info("normed mean score @ %d:\n%s" % (summary.sizes[ITER], xru.da_to_string(final_score)))

    # Now saving results
    meta = {"args": serializable_dict(args), "signature": signatures}
    XRSerializer.save_derived(agg_result, meta, args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.PERF_RESULTS)

    XRSerializer.save_derived(summary, meta, args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.MEAN_SCORE)

    final_msg = xru.da_to_string(
        100 * (1.0 - summary[PERF_MEAN].sel({cc.OBJECTIVE: leaderboard_metric}, drop=True)[{ITER: -1}])
    )
    logger.info("-" * 20)
    logger.info("Final score `100 x (1-loss)` for leaderboard:\n%s" % final_msg)


if __name__ == "__main__":
    main()  # pragma: main


================================================
FILE: bayesmark/experiment_baseline.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Build performance baselines from aggregate results to prepare analysis.
"""
import logging
import warnings
from collections import OrderedDict

import numpy as np

import bayesmark.constants as cc
import bayesmark.expected_max as em
import bayesmark.quantiles as qt
from bayesmark.cmd_parse import CmdArgs, general_parser, parse_args
from bayesmark.constants import ARG_DELIM, ITER, METHOD, PERF_BEST, PERF_CLIP, PERF_MEAN, PERF_MED, SUGGEST, TEST_CASE
from bayesmark.experiment_aggregate import validate_agg_perf
from bayesmark.serialize import XRSerializer
from bayesmark.util import str_join_safe
from bayesmark.xr_util import ds_concat, ds_like_mixed

# Mathematical settings
# We could move these to constants to eliminate repetition but we will probably phase out anyway
EVAL_Q = 0.5  # Evaluate based on median loss across n_trials
ALPHA = 0.05  # ==> 95% CIs
MIN_POS = np.nextafter(0, 1)
PAD_FACTOR = 10000

logger = logging.getLogger(__name__)


def validate(baseline_ds):
    """Perform same tracks as will happen in analysis."""
    for func_name in baseline_ds.coords[TEST_CASE].values:
        rand_perf_med = baseline_ds[PERF_MED].sel({TEST_CASE: func_name}, drop=True).values
        rand_perf_mean = baseline_ds[PERF_MEAN].sel({TEST_CASE: func_name}, drop=True).values
        best_opt = baseline_ds[PERF_BEST].sel({TEST_CASE: func_name}, drop=True).values
        base_clip_val = baseline_ds[PERF_CLIP].sel({TEST_CASE: func_name}, drop=True).values

        assert np.all(np.diff(rand_perf_med) <= 0), "Baseline should be decreasing with iteration"
        assert np.all(np.diff(rand_perf_mean) <= 0), "Baseline should be decreasing with iteration"
        assert np.all(rand_perf_med > best_opt)
        assert np.all(rand_perf_mean > best_opt)
        assert np.all(rand_perf_mean <= base_clip_val)


def compute_baseline(perf_da):
    """Compute a performance baseline of base and best performance from the aggregate experimental results.

    Parameters
    ----------
    perf_da : :class:`xarray:xarray.DataArray`
        Aggregate experimental results with each function evaluation in the experiments. `all_perf` has dimensions
        ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)`` as is assumed to have no nan values.

    Returns
    -------
    baseline_ds : :class:`xarray:xarray.Dataset`
        Dataset with baseline performance. It was variables ``(PERF_MED, PERF_MEAN, PERF_CLIP, PERF_BEST)`` with
        dimensions ``(ITER, TEST_CASE)``, ``(ITER, TEST_CASE)``, ``(TEST_CASE,)``, and ``(TEST_CASE,)``, respectively.
        `PERF_MED` is a baseline of performance based on random search when using medians to summarize performance.
        Likewise, `PERF_MEAN` is for means. `PERF_CLIP` is an upperbound to clip poor performance when using the mean.
        `PERF_BEST` is an estimate on the global minimum.
    """
    validate_agg_perf(perf_da)

    ref_prefix = str_join_safe(ARG_DELIM, (cc.RANDOM_SEARCH, ""))
    ref_random = [kk for kk in perf_da.coords[METHOD].values if kk.startswith(ref_prefix)]
    assert len(ref_random) > 0, "Did not find any random search in methods."

    # Now many points we will have after each batch
    trials_grid = perf_da.sizes[SUGGEST] * (1 + np.arange(perf_da.sizes[ITER]))

    # Now iterate over problems and get baseline performance
    baseline_ds = ds_like_mixed(
        perf_da,
        [
            (PERF_MED, [ITER, TEST_CASE]),
            (PERF_MEAN, [ITER, TEST_CASE]),
            (PERF_CLIP, [TEST_CASE]),
            (PERF_BEST, [TEST_CASE]),
        ],
        (ITER, TEST_CASE),
    )
    for func_name in perf_da.coords[TEST_CASE].values:
        random_evals = np.ravel(perf_da.sel({METHOD: ref_random, TEST_CASE: func_name}, drop=True).values)
        assert random_evals.size > 0

        # We will likely change this to a min mean (instead of median) using a different util in near future:
        assert np.all(trials_grid == perf_da.sizes[SUGGEST] * (1 + baseline_ds.coords[ITER].values))
        rand_perf, _, _ = qt.min_quantile_CI(random_evals, EVAL_Q, trials_grid, alpha=ALPHA)
        baseline_ds[PERF_MED].loc[{TEST_CASE: func_name}] = rand_perf

        # Decide on a level to clip when computing the mean
        base_clip_val = qt.quantile(random_evals, EVAL_Q)
        assert np.isfinite(base_clip_val), "Median random search performance is not even finite."
        assert (perf_da.sizes[SUGGEST] > 1) or np.isclose(base_clip_val, rand_perf[0])
        baseline_ds[PERF_CLIP].loc[{TEST_CASE: func_name}] = base_clip_val

        # Estimate the global min via best of any method
        best_opt = np.min(perf_da.sel({TEST_CASE: func_name}, drop=True).values)
        if np.any(rand_perf <= best_opt):
            warnings.warn(
                "Random search is also the best search on %s, the normalized score may be meaningless." % func_name,
                RuntimeWarning,
            )
        assert np.isfinite(best_opt), "Best performance found is not even finite."
        logger.info("best %s %f" % (func_name, best_opt))

        # Now make sure strictly less than to avoid assert error in linear_rescale. This will likely give normalized
        # scores of +inf or -inf, but with median summary that is ok. When everything goes to mean, we will need to
        # change this:
        pad = PAD_FACTOR * np.spacing(-np.maximum(MIN_POS, np.abs(best_opt)))
        assert pad < 0
        best_opt = best_opt + pad
        assert np.isfinite(best_opt), "Best performance too close to limit of float range."
        assert np.all(rand_perf > best_opt)
        baseline_ds[PERF_BEST].loc[{TEST_CASE: func_name}] = best_opt

        random_evals = np.minimum(base_clip_val, random_evals)
        assert np.all(np.isfinite(random_evals))
        assert np.all(best_opt <= random_evals)

        rand_perf = em.expected_min(random_evals, trials_grid)
        rand_perf_fixed = np.minimum(base_clip_val, rand_perf)
        assert np.allclose(rand_perf, rand_perf_fixed)
        rand_perf_fixed = np.minimum.accumulate(rand_perf_fixed)
        assert np.allclose(rand_perf, rand_perf_fixed)
        baseline_ds[PERF_MEAN].loc[{TEST_CASE: func_name}] = rand_perf_fixed
    assert not any(np.any(np.isnan(baseline_ds[kk].values)) for kk in baseline_ds)
    validate(baseline_ds)
    return baseline_ds


def do_baseline(args):  # pragma: io
    """Alternate entry into the program without calling the actual main.
    """
    # Load in the eval data and sanity check
    perf_ds, meta = XRSerializer.load_derived(args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.EVAL_RESULTS)
    logger.info("Meta data from source file: %s" % str(meta["args"]))

    D = OrderedDict()
    for kk in perf_ds:
        perf_da = perf_ds[kk]
        D[(kk,)] = compute_baseline(perf_da)
    baseline_ds = ds_concat(D, dims=(cc.OBJECTIVE,))

    # Keep in same order for cleanliness
    baseline_ds = baseline_ds.sel({cc.OBJECTIVE: list(perf_ds)})
    assert list(perf_ds) == baseline_ds.coords[cc.OBJECTIVE].values.tolist()

    # Could optionally remove this once we think things have enough tests
    for kk in D:
        assert baseline_ds.sel({cc.OBJECTIVE: kk[0]}, drop=True).identical(D[kk])

    # Now dump the results
    XRSerializer.save_derived(baseline_ds, meta, args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.BASELINE)


def main():
    """See README for instructions on calling baseline.
    """
    description = "Aggregate the baselines for later analysis in benchmark"
    args = parse_args(general_parser(description))

    logger.setLevel(logging.INFO)  # Note this is the module-wide logger
    if args[CmdArgs.verbose]:
        logger.addHandler(logging.StreamHandler())

    do_baseline(args)
    logger.info("done")


if __name__ == "__main__":
    main()  # pragma: main


================================================
FILE: bayesmark/experiment_db_init.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tool to create new datebase for results. This is just wrapper on serializer init call.
"""
import logging

import bayesmark.cmd_parse as cmd
from bayesmark.cmd_parse import CmdArgs
from bayesmark.constants import EXP_VARS
from bayesmark.serialize import XRSerializer

EXIST_OK = True

logger = logging.getLogger(__name__)


def main():
    """See README for instructions on calling db_init.
    """
    description = "Initialize the directories for running the experiments"
    args = cmd.parse_args(cmd.general_parser(description))

    assert not args[CmdArgs.dry_run], "Dry run doesn't make any sense when building dirs"

    logger.setLevel(logging.INFO)  # Note this is the module-wide logger
    if args[CmdArgs.verbose]:
        logger.addHandler(logging.StreamHandler())

    XRSerializer.init_db(args[CmdArgs.db_root], db=args[CmdArgs.db], keys=EXP_VARS, exist_ok=EXIST_OK)

    logger.info("done")


if __name__ == "__main__":
    main()  # pragma: main


================================================
FILE: bayesmark/experiment_launcher.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Launch studies in separate studies or do dry run to build jobs file with lists of commands to run.
"""
import json
import logging
import random as pyrandom
import uuid as pyuuid
import warnings
from itertools import product
from subprocess import TimeoutExpired, call

import numpy as np

import bayesmark.cmd_parse as cmd
from bayesmark.builtin_opt.config import CONFIG
from bayesmark.cmd_parse import CMD_STR, CmdArgs, serializable_dict
from bayesmark.constants import ARG_DELIM, DATA_LOADER_NAMES, EXP_VARS, METRICS, MODEL_NAMES, PY_INTERPRETER
from bayesmark.data import METRICS_LOOKUP, get_problem_type
from bayesmark.np_util import random as np_random
from bayesmark.np_util import random_seed, strat_split
from bayesmark.path_util import absopen
from bayesmark.serialize import XRSerializer
from bayesmark.util import range_str, shell_join, str_join_safe, strict_sorted

# How much of uuid to put in job name to avoid name clashes
UUID_JOB_CHARS = 7
# Warning: this name is also specified in setup.py, and violates the DRY principle. So if it gets changed in setup.py,
# it must also be changed here!
EXPERIMENT_ENTRY = "bayesmark-exp"

logger = logging.getLogger(__name__)


def _is_arg_safe(ss):
    """Check if `str` is safe as argument to `argparse`."""
    if len(ss) == 0:
        return False
    safe = ss[0] != "-"
    return safe


def arg_safe_str(val):
    """Cast value as `str`, raise error if not safe as argument to `argparse`."""
    ss = str(val)
    if not _is_arg_safe(ss):
        raise ValueError("%s is not safe for argparse" % ss)
    return ss


def gen_commands(args, opt_file_lookup, run_uuid):
    """Generator providing commands to launch processes for experiments.

    Parameters
    ----------
    args : dict(CmdArgs, [int, str])
        Arguments of options to pass to the experiments being launched. The keys corresponds to the same arguments
        passed to this program.
    opt_file_lookup : dict(str, str)
        Mapping from method name to filename containing wrapper class for the method.
    run_uuid : uuid.UUID
        UUID for this launcher run. Needed to generate different experiments UUIDs on each call. This function is
        deterministic provided the same `run_uuid`.

    Yields
    ------
    iteration_key : (str, str, str, str)
        Tuple containing ``(trial, classifier, data, optimizer)`` to index the experiment.
    full_cmd : tuple(str)
        Strings containing command and arguments to run a process with experiment. Join with whitespace or use
        :func:`.util.shell_join` to get string with executable command. The command omits ``--opt-root`` which means it
        will default to ``.`` if the command is executed. As such, the command assumes it is executed with
        ``--opt-root`` as the working directory.
    """
    args_to_pass_thru = [CmdArgs.n_calls, CmdArgs.n_suggest, CmdArgs.db_root, CmdArgs.db]
    # This could be made simpler and avoid if statement if we just always pass dataroot, even if no custom data used.
    if args[CmdArgs.data_root] is not None:
        args_to_pass_thru.append(CmdArgs.data_root)

    # Possibilities to iterate over. Put them in sorted order just for good measure.
    c_list = strict_sorted(MODEL_NAMES if args[CmdArgs.classifier] is None else args[CmdArgs.classifier])
    d_list = strict_sorted(DATA_LOADER_NAMES if args[CmdArgs.data] is None else args[CmdArgs.data])
    o_list = strict_sorted(
        list(opt_file_lookup.keys()) + list(CONFIG.keys())
        if args[CmdArgs.optimizer] is None
        else args[CmdArgs.optimizer]
    )
    assert all(
        ((optimizer in opt_file_lookup) or (optimizer in CONFIG)) for optimizer in o_list
    ), "unknown optimizer in optimizer list"

    m_set = set(METRICS if args[CmdArgs.metric] is None else args[CmdArgs.metric])
    m_lookup = {problem_type: sorted(m_set.intersection(mm)) for problem_type, mm in METRICS_LOOKUP.items()}
    assert all(
        (len(m_lookup[get_problem_type(data)]) > 0) for data in d_list
    ), "At one metric needed for each problem type of data sets"

    G = product(range_str(args[CmdArgs.n_repeat]), c_list, d_list, o_list)  # iterate all combos
    for rep, classifier, data, optimizer in G:
        _, rep_str = rep
        problem_type = get_problem_type(data)
        for metric in m_lookup[problem_type]:
            # Get a reproducible string based (conditioned on having same (run uuid), but should also never give
            # a duplicate (unless we force the same run uuid twice).
            iteration_key = (rep_str, classifier, data, optimizer, metric)
            iteration_id = str_join_safe(ARG_DELIM, iteration_key)
            sub_uuid = pyuuid.uuid5(run_uuid, iteration_id).hex

            # Build the argument list for subproc, passing some args thru
            cmd_args_pass_thru = [[CMD_STR[vv][0], arg_safe_str(args[vv])] for vv in args_to_pass_thru]
            # Technically, the optimizer is is not actually needed here for non-built in optimizers because it already
            # specified via the entry point: optimizer_wrapper_file
            cmd_args = [
                [CMD_STR[CmdArgs.classifier][0], arg_safe_str(classifier)],
                [CMD_STR[CmdArgs.data][0], arg_safe_str(data)],
                [CMD_STR[CmdArgs.optimizer][0], arg_safe_str(optimizer)],
                [CMD_STR[CmdArgs.uuid][0], arg_safe_str(sub_uuid)],
                [CMD_STR[CmdArgs.metric][0], arg_safe_str(metric)],
            ]
            cmd_args = tuple(sum(cmd_args + cmd_args_pass_thru, []))
            logger.info(" ".join(cmd_args))

            # The experiment command without the arguments
            if optimizer in CONFIG:  # => built in optimizer wrapper
                experiment_cmd = (EXPERIMENT_ENTRY,)
            else:
                optimizer_wrapper_file = opt_file_lookup[optimizer]
                assert optimizer_wrapper_file.endswith(".py"), "optimizer wrapper should a be .py file"
                experiment_cmd = (PY_INTERPRETER, optimizer_wrapper_file)

            # Check arg safe again, off elements in list need to be argsafe
            assert all((_is_arg_safe(ss) == (ii % 2 == 1)) for ii, ss in enumerate(cmd_args))

            full_cmd = experiment_cmd + cmd_args
            yield iteration_key, full_cmd


def dry_run(args, opt_file_lookup, run_uuid, fp, random=np_random):
    """Write to buffer description of commands for running all experiments.

    This function is almost pure by writing to a buffer, but it could be switched to a generator.

    Parameters
    ----------
    args : dict(CmdArgs, [int, str])
        Arguments of options to pass to the experiments being launched. The keys corresponds to the same arguments
        passed to this program.
    opt_file_lookup : dict(str, str)
        Mapping from method name to filename containing wrapper class for the method.
    run_uuid : uuid.UUID
        UUID for this launcher run. Needed to generate different experiments UUIDs on each call. This function is
        deterministic provided the same `run_uuid`.
    fp : writable buffer
        File handle to write out sequence of commands to execute (broken into jobs on each line) to execute all the
        experiments (possibly each job in parallel).
    random : RandomState
        Random stream to use for reproducibility.
    """
    assert args[CmdArgs.n_jobs] > 0, "Must have non-zero jobs for dry run"

    # Taking in file pointer since then we can test without actual file. Could also build generator that returns lines
    # to write.
    manual_setup_info = XRSerializer.init_db_manual(args[CmdArgs.db_root], db=args[CmdArgs.db], keys=EXP_VARS)
    warnings.warn(manual_setup_info, UserWarning)

    # Get the commands
    dry_run_commands = {}
    G = gen_commands(args, opt_file_lookup, run_uuid)
    for (_, _, _, optimizer, _), full_cmd in G:
        cmd_str = shell_join(full_cmd)
        dry_run_commands.setdefault(optimizer, []).append(cmd_str)

    # Make sure we never have any empty jobs, which is a waste
    n_commands = sum(len(v) for v in dry_run_commands.values())
    n_jobs = min(args[CmdArgs.n_jobs], n_commands)

    # Would prob also work with pyrandom, but only tested np random so far
    subcommands = strat_split(list(dry_run_commands.values()), n_jobs, random=random)
    # Make sure have same commands overall, delete once we trust strat_split
    assert sorted(np.concatenate(subcommands)) == sorted(sum(list(dry_run_commands.values()), []))

    job_suffix = run_uuid.hex[:UUID_JOB_CHARS]

    # Include comments as reproducibility lines
    args_str = serializable_dict(args)
    fp.write("# running: %s\n" % str(args_str))
    fp.write("# cmd: %s\n" % cmd.cmd_str())
    for ii, ii_str in range_str(n_jobs):
        assert len(subcommands[ii]) > 0
        fp.write("job_%s_%s %s\n" % (job_suffix, ii_str, " && ".join(subcommands[ii])))


def real_run(args, opt_file_lookup, run_uuid, timeout=None):  # pragma: io
    """Run sequence of independent experiments to fully run the benchmark.

    This uses `subprocess` to launch a separate process (in serial) for each experiment.

    Parameters
    ----------
    args : dict(CmdArgs, [int, str])
        Arguments of options to pass to the experiments being launched. The keys corresponds to the same arguments
        passed to this program.
    opt_file_lookup : dict(str, str)
        Mapping from method name to filename containing wrapper class for the method.
    run_uuid : uuid.UUID
        UUID for this launcher run. Needed to generate different experiments UUIDs on each call. This function is
        deterministic provided the same `run_uuid`.
    timeout : int
        Max seconds per experiment
    """
    args[CmdArgs.db] = XRSerializer.init_db(args[CmdArgs.db_root], db=args[CmdArgs.db], keys=EXP_VARS, exist_ok=True)
    logger.info("Supply --db %s to append to this experiment or reproduce jobs file." % args[CmdArgs.db])

    # Get and run the commands in a sub-process
    counter = 0
    G = gen_commands(args, opt_file_lookup, run_uuid)
    for _, full_cmd in G:
        try:
            status = call(full_cmd, shell=False, cwd=args[CmdArgs.optimizer_root], timeout=timeout)
            if status != 0:
                raise ChildProcessError("status code %d returned from:\n%s" % (status, " ".join(full_cmd)))
        except TimeoutExpired:
            logger.info(f"Experiment timeout after {timeout} seconds.")
            print(json.dumps({"experiment_timeout_exception": " ".join(full_cmd)}))

        counter += 1
    logger.info(f"Benchmark script ran {counter} studies successfully.")


def main():
    """See README for instructions on calling launcher.
    """
    description = "Launch series of studies across functions and optimizers"
    args = cmd.parse_args(cmd.launcher_parser(description))

    logger.setLevel(logging.INFO)  # Note this is the module-wide logger
    if args[CmdArgs.verbose]:
        logger.addHandler(logging.StreamHandler())

    # Get optimizer settings, says which file to call for each optimizer
    settings = cmd.load_optimizer_settings(args[CmdArgs.optimizer_root])
    opt_file_lookup = {optimizer: wrapper_file for optimizer, (wrapper_file, _) in settings.items()}

    # Setup uuid
    if args[CmdArgs.uuid] is None:
        args[CmdArgs.uuid] = pyuuid.uuid4().hex  # debatable if uuid1 or uuid4 is better here
    else:
        warnings.warn(
            "User UUID supplied. This is only desired for debugging. Careless use could lead to study id conflicts.",
            UserWarning,
        )
    run_uuid = pyuuid.UUID(hex=args[CmdArgs.uuid])
    assert run_uuid.hex == args[CmdArgs.uuid]
    logger.info("Supply --uuid %s to reproduce this run." % run_uuid.hex)

    # Log all the options
    print("Launcher options (JSON):\n")
    print(json.dumps({"bayesmark-launch-args": cmd.serializable_dict(args)}))
    print("\n")

    # Set the master seed (derive from the uuid we just setup)
    pyrandom.seed(run_uuid.int)
    np.random.seed(random_seed(pyrandom))

    # Now run it, either to dry run file or executes sub-processes
    if args[CmdArgs.dry_run]:
        with absopen(args[CmdArgs.jobs_file], "w") as fp:
            dry_run(args, opt_file_lookup, run_uuid, fp)
    else:
        timeout = args[CmdArgs.timeout] if args[CmdArgs.timeout] > 0 else None
        real_run(args, opt_file_lookup, run_uuid, timeout)

    logger.info("done")


if __name__ == "__main__":
    main()  # pragma: main


================================================
FILE: bayesmark/np_util.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities to that could be included in `numpy` but aren't.
"""
import numpy as np

# np seed must be in [0, 2**32 - 1] = [0, uint32 max]
SEED_MAX_INCL = np.iinfo(np.uint32).max

# Access default numpy rng in way that is short and sphinx friendly
random = np.random.random.__self__


def random_seed(random=random):
    """Draw a random seed compatible with :class:`numpy:numpy.random.RandomState`.

    Parameters
    ----------
    random : :class:`numpy:numpy.random.RandomState`
        Random stream to use to draw the random seed.

    Returns
    -------
    seed : int
        Seed for a new random stream in ``[0, 2**32-1)``.
    """
    # np randint is exclusive on the high value, py randint is inclusive. We
    # must use inclusive limit here to work with both. We are missing one
    # possibility here (2**32-1), but I don't think that matters.
    seed = random.randint(0, SEED_MAX_INCL)
    return seed


def shuffle_2d(X, random=random):
    """Generalization of :func:`numpy:numpy.random.shuffle` of 2D array.

    Performs in-place shuffling of `X`. So, it has no return value.

    Parameters
    ----------
    X : :class:`numpy:numpy.ndarray` of shape (n, m)
        Array-like 2D data to shuffle in place. Shuffles order of rows and order of elements within a row.
    random : :class:`numpy:numpy.random.RandomState`
        Random stream to use to draw the random seed.
    """
    random.shuffle(X)
    for rr in X:
        random.shuffle(rr)


def strat_split(X, n_splits, inplace=False, random=random):
    """Make a stratified random split of items.

    Parameters
    ----------
    X : :class:`numpy:numpy.ndarray` of shape (n, m)
        Data we would like to split randomly into groups. We should get the same number +/-1 of elements from each row
        in each group.
    n_splits : int
        How many groups we want to split into.
    inplace : bool
        If true, this function will cause in place modifications to `X`.
    random : :class:`numpy:numpy.random.RandomState`
        Random stream to use for reproducibility.

    Returns
    -------
    Y : list(:class:`numpy:numpy.ndarray`)
        Stratified split of `X` where each row of `Y` contains the same number +/-1 of elements from each row of `X`.
        Must be a list of arrays since each row may have a different length.
    """
    # Arguably, this function could go in stats
    assert np.ndim(X) == 2
    assert n_splits > 0

    if not inplace:
        X = np.array(X, copy=True)

    shuffle_2d(X, random=random)
    # Note this is like X.T.ravel()
    Y = np.array_split(np.ravel(X, order="F"), n_splits)

    # Just for good measure make sure this is shuffled too, prob not needed.
    shuffle_2d(Y, random=random)
    return Y


def isclose_lte(x, y):
    """Check that less than or equal to (lte, ``x <= y``) is approximately true between all elements of `x` and `y`.

    This is similar to :func:`numpy:numpy.allclose` for equality. Shapes of all input variables must be broadcast
    compatible.

    Parameters
    ----------
    x : :class:`numpy:numpy.ndarray`
        Lower limit in ``<=`` check.
    y : :class:`numpy:numpy.ndarray`
        Upper limit in ``<=`` check.

    Returns
    -------
    lte : bool
        True if ``x <= y`` is approximately true element-wise.
    """
    # Use np.less_equal to ensure always np type consistently
    lte = np.less_equal(x, y) | np.isclose(x, y)
    return lte


def clip_chk(x, lb, ub, allow_nan=False):
    """Clip all element of `x` to be between `lb` and `ub` like :func:`numpy:numpy.clip`, but also check
    :func:`numpy:numpy.isclose`.

    Shapes of all input variables must be broadcast compatible.

    Parameters
    ----------
    x : :class:`numpy:numpy.ndarray`
        Array containing elements to clip.
    lb : :class:`numpy:numpy.ndarray`
        Lower limit in clip.
    ub : :class:`numpy:numpy.ndarray`
        Upper limit in clip.
    allow_nan : bool
        If true, we allow ``nan`` to be present in `x` without out raising an error.

    Returns
    -------
    x : :class:`numpy:numpy.ndarray`
        An array with the elements of `x`, but where values < `lb` are replaced with `lb`, and those > `ub` with `ub`.
    """
    assert np.all(lb <= ub)  # np.clip does not do this check

    x = np.asarray(x)

    # These are asserts not exceptions since clip_chk most used internally.
    if allow_nan:
        assert np.all(isclose_lte(lb, x) | np.isnan(x))
        assert np.all(isclose_lte(x, ub) | np.isnan(x))
    else:
        assert np.all(isclose_lte(lb, x))
        assert np.all(isclose_lte(x, ub))
    x = np.clip(x, lb, ub)
    return x


def snap_to(x, fixed_val=None):
    """Snap input `x` to the `fixed_val` unless `fixed_val` is `None`, where `x` is returned.

    Parameters
    ----------
    x : :class:`numpy:numpy.ndarray`
        Array containing elements to snap.
    fixed_val : :class:`numpy:numpy.ndarray` or None
        Values to be returned if `x` is close, otherwise an error is raised. If `fixed_val` is `None`, `x` is returned.

    Returns
    -------
    fixed_val : :class:`numpy:numpy.ndarray`
        Snapped to value of `x`.
    """
    if fixed_val is None:
        return x

    # Include == for discrete types where allclose doesn't work
    if not (np.all(x == fixed_val) or np.allclose(x, fixed_val)):
        raise ValueError("Expected fixed value %s, got %s." % (repr(fixed_val), repr(x)))

    assert np.all(x == fixed_val) or np.allclose(x, fixed_val)
    fixed_val = np.broadcast_to(fixed_val, np.shape(x))
    return fixed_val


def linear_rescale(X, lb0, ub0, lb1, ub1, enforce_bounds=True):
    """Linearly transform all elements of `X`, bounded between `lb0` and `ub0`, to be between `lb1` and `ub1`.

    Shapes of all input variables must be broadcast compatible.

    Parameters
    ----------
    X : :class:`numpy:numpy.ndarray`
        Array containing elements to rescale.
    lb0 : :class:`numpy:numpy.ndarray`
        Current lower bound of `X`.
    ub0 : :class:`numpy:numpy.ndarray`
        Current upper bound of `X`.
    lb1 : :class:`numpy:numpy.ndarray`
        Desired lower bound of `X`.
    ub1 : :class:`numpy:numpy.ndarray`
        Desired upper bound of `X`.
    enforce_bounds : bool
        If True, perform input bounds check (and clipping if slight violation) on the input `X` and again on the
        output. This argument is not meant to be vectorized like the other input variables.

    Returns
    -------
    X : :class:`numpy:numpy.ndarray`
        Elements of input `X` after linear rescaling.
    """
    assert np.all(np.isfinite(lb0))
    assert np.all(np.isfinite(lb1))
    assert np.all(np.isfinite(ub0))
    assert np.all(np.isfinite(ub1))
    assert np.all(lb0 < ub0)
    assert np.all(lb1 <= ub1)

    m = np.true_divide(ub1 - lb1, ub0 - lb0)
    assert np.all(m >= 0)

    if enforce_bounds:
        X = clip_chk(X, lb0, ub0)  # This will flag any non-finite X input.
        X = clip_chk(m * (X - lb0) + lb1, lb1, ub1)
    else:
        X = m * (X - lb0) + lb1
    return X


def argmin_2d(X):
    """Take the arg minimum of a 2D array."""
    assert X.size > 0, "argmin of empty array not defined"

    ii, jj = np.unravel_index(X.argmin(), X.shape)
    return ii, jj


def cummin(x_val, x_key):
    """Get the cumulative minimum of `x_val` when ranked according to `x_key`.

    Parameters
    ----------
    x_val : :class:`numpy:numpy.ndarray` of shape (n, d)
        The array to get the cumulative minimum of along axis 0.
    x_key : :class:`numpy:numpy.ndarray` of shape (n, d)
        The array for ranking elements as to what is the minimum.

    Returns
    -------
    c_min : :class:`numpy:numpy.ndarray` of shape (n, d)
        The cumulative minimum array.
    """
    assert x_val.shape == x_key.shape
    assert x_val.ndim == 2
    assert not np.any(np.isnan(x_key)), "cummin not defined for nan key"

    n, _ = x_val.shape

    xm = np.minimum.accumulate(x_key, axis=0)
    idx = np.maximum.accumulate((x_key <= xm) * np.arange(n)[:, None])
    c_min = np.take_along_axis(x_val, idx, axis=0)
    return c_min


================================================
FILE: bayesmark/path_util.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities handy for manipulating paths that have extra checks not included in `os.path`.
"""
import os.path
import warnings


def abspath(path, verify=True):  # pragma: io
    """Combo of :func:`os.path.abspath` and :func:`os.path.expanduser` that will also check existence of directory.

    Parameters
    ----------
    path : str
        Relative path string that can also contain home directories, e.g., ``"~/git/"``.
    verify : bool
        If true, verifies that the directory exists. Raises an assertion failure if it does not exist.

    Returns
    -------
    path : str
        Absolute version of input path.
    """
    path = os.path.abspath(os.path.expanduser(path))
    if verify:
        assert os.path.isdir(path), "directory does not exist: %s" % path
    return path


def absopen(path, mode):  # pragma: io
    """Safe version of the built in :func:`open` that only opens absolute paths.

    Parameters
    ----------
    path : str
        Absolute path. An assertion failure is raised if it is not absolute.
    mode : str
        Open mode, any mode understood by the built in :func:`open`, e.g., ``"r"`` or ``"w"``.

    Returns
    -------
    f : file handle
        File handle open to use.
    """
    assert os.path.isabs(path), "Only allowing opening of absolute paths for safety."
    f = open(path, mode)
    return f


def _join_safe(*args):  # pragma: io
    """Helper routine with commonalities between `join_safe_r` and `join_safe_w`.
    """
    assert len(args) >= 2
    path, fname = args[:-1], args[-1]

    path = os.path.join(*path)  # Put together the dir
    path = abspath(path, verify=True)  # Make sure dir is abs, and exists

    assert os.path.basename(fname) == fname, "Expected basename got %s" % fname
    fname = os.path.join(path, fname)  # Put on the filename, must be abs
    # Could check abs again if really wanted to be safe
    return fname


def join_safe_r(*args):  # pragma: io
    """Safe version of :func:`os.path.join` that checks resulting path is absolute and the file exists for reading.

    Parameters
    ----------
    *args : str
        varargs for parts of path to combine. The last argument must be a file name.

    Returns
    -------
    fname : str
        Absolute path to filename.
    """
    fname = _join_safe(*args)
    assert os.path.isfile(fname)  # Check it exists
    return fname


def join_safe_w(*args):  # pragma: io
    """Safe version of :func:`os.path.join` that checks resulting path is absolute.

    Because this routine is for writing, if the file already exists, a warning is raised.

    Parameters
    ----------
    *args : str
        varargs for parts of path to combine. The last argument must be a file name.

    Returns
    -------
    fname : str
        Absolute path to filename.
    """
    fname = _join_safe(*args)
    # Give a warning if it exists
    if os.path.isfile(fname):
        warnings.warn("file already exists: %s" % fname, RuntimeWarning)
    return fname


================================================
FILE: bayesmark/quantiles.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Compute quantiles and confidence intervals.
"""
import numpy as np
import scipy.stats as ss

from bayesmark.np_util import isclose_lte


def ensure_shape(x, y):
    """Util to broadcast on var to another but only when shape is different.

    This way we don't convert scalar into array type unnecessarily.
    """
    shape_y = np.shape(y)
    if np.shape(x) == shape_y:
        return x
    return np.broadcast_to(x, shape_y)


def order_stats(X):
    """Compute order statistics on sample `X`.

    Follows convention that order statistic 1 is minimum and statistic n is maximum. Therefore, array elements ``0``
    and ``n+1`` are ``-inf`` and ``+inf``.

    Parameters
    ----------
    X : :class:`numpy:numpy.ndarray` of shape (n,)
        Data for order statistics. Can be vectorized. Must be sortable data type (which is almost everything).

    Returns
    -------
    o_stats : :class:`numpy:numpy.ndarray` of shape (n+2,)
        Order statistics on `X`.
    """
    assert np.ndim(X) >= 1
    # NaN is not allowed since it does not have well defined order.
    assert not np.any(np.isnan(X))

    X_shape = np.shape(X)
    inf_pad = np.full(X_shape[:-1] + (1,), np.inf)

    o_stats = np.concatenate((-inf_pad, np.sort(X, axis=-1), inf_pad), axis=-1)
    return o_stats


def _quantile(n, q):
    idx = np.ceil(n * q).astype(int)
    return idx


def quantile(X, q):
    """Computes `q` th quantile of `X`.

    Similar to :func:`numpy:numpy.percentile` except that it matches the mathematical definition of a quantile *and*
    `q` is scaled in (0,1) rather than (0,100).

    Parameters
    ----------
    X : :class:`numpy:numpy.ndarray` of shape (n,)
        Data for quantile estimation. Can be vectorized. Must be sortable data type (which is almost everything).
    q : float
        Quantile to compute, must be in (0, 1). Can be vectorized.

    Returns
    -------
    estimate : dtype of `X`, scalar
        Empirical `q` quantile from sample `X`.
    """
    assert np.ndim(X) >= 1
    # We could robustify things to allow the edge cases, but maybe later
    assert np.all(0 < q) and np.all(q < 1)
    # Currently don't support broadcasting both at same time
    assert np.ndim(X) == 1 or np.ndim(q) == 0

    n = X.shape[-1]
    idx = _quantile(n, q)

    o_stats = order_stats(X)
    estimate = o_stats[..., idx]
    return estimate


def _quantile_CI(n, q, alpha):
    # Use in case there is -inf case from being at extreme of distn
    idx_lower = np.fmax(0, ss.binom.ppf(alpha / 2.0, n, q)).astype(int)
    assert np.all(isclose_lte(ss.binom.cdf(idx_lower - 1, n, q), alpha / 2.0))
    assert np.all(isclose_lte(alpha / 2.0, ss.binom.cdf(idx_lower, n, q)))
    assert np.all(0 <= idx_lower) and np.all(idx_lower <= n + 1)

    idx_upper = np.fmax(0, ss.binom.isf(alpha / 2.0, n, q)).astype(int) + 1
    assert np.all(isclose_lte(ss.binom.sf(idx_upper - 1, n, q), alpha / 2.0))
    assert np.all(isclose_lte(alpha / 2.0, ss.binom.sf(idx_upper - 2, n, q)))
    assert np.all(isclose_lte(1 - (alpha / 2.0), ss.binom.cdf(idx_upper - 1, n, q)))
    assert np.all(isclose_lte(ss.binom.cdf(idx_upper - 2, n, q), 1 - (alpha / 2.0)))
    assert np.all(0 <= idx_upper) and np.all(idx_upper <= n + 1)

    C = ss.binom.cdf(idx_upper - 1, n, q) - ss.binom.cdf(idx_lower - 1, n, q)
    assert np.all(isclose_lte(1.0 - alpha, C))

    return idx_lower, idx_upper


def quantile_CI(X, q, alpha=0.05):
    """Calculate CI on `q` quantile from same `X` using nonparametric estimation from order statistics.

    This will have alpha level of at most `alpha` due to the discrete nature of order statistics.

    Parameters
    ----------
    X : :class:`numpy:numpy.ndarray` of shape (n,)
        Data for quantile estimation. Can be vectorized. Must be sortable data type (which is almost everything).
    q : float
        Quantile to compute, must be in (0, 1). Can be vectorized.
    alpha : float
        False positive rate we allow for CI, must be in (0, 1). Can be vectorized.

    Returns
    -------
    LB : dtype of `X`, scalar
        Lower end on CI
    UB : dtype of `X`, scalar
        Upper end on CI
    """
    assert np.ndim(X) >= 1
    # We could robustify things to allow the edge cases, but maybe later
    assert np.all(0 < q) and np.all(q < 1)
    assert np.all(0 < alpha) and np.all(alpha < 1)
    # Currently don't support broadcasting both at same time
    assert np.ndim(X) == 1 or (np.ndim(q) == 0 and np.ndim(alpha) == 0)

    n = X.shape[-1]
    idx_lower, idx_upper = _quantile_CI(n, q, alpha)

    o_stats = order_stats(X)
    LB, UB = o_stats[..., idx_lower], o_stats[..., idx_upper]
    return LB, UB


def max_quantile_CI(X, q, m, alpha=0.05):
    """Calculate CI on `q` quantile of distribution on max of `m` iid samples using a data set `X`.

    This uses nonparametric estimation from order statistics and will have alpha level of at most `alpha` due to the
    discrete nature of order statistics.

    Parameters
    ----------
    X : :class:`numpy:numpy.ndarray` of shape (n,)
        Data for quantile estimation. Can be vectorized. Must be sortable data type (which is almost everything).
    q : float
        Quantile to compute, must be in (0, 1). Can be vectorized.
    m : int
        Compute statistics for distribution on max over `m` samples. Must be ``>= 1``. Can be vectorized.
    alpha : float
        False positive rate we allow for CI, must be in (0, 1). Can be vectorized.

    Returns
    -------
    estimate : dtype of `X`, scalar
        Best estimate on `q` quantile on max over `m` iid samples.
    LB : dtype of `X`, scalar
        Lower end on CI
    UB : dtype of `X`, scalar
        Upper end on CI
    """
    # X and alpha used/checked below in quantile_CI routine.
    # We could robustify things to allow the edge cases, but maybe later
    assert np.all(0 < q) and np.all(q < 1)
    # Could check int but if someone wants to interpolate, we will let them.
    assert np.all(m >= 1)
    # Currently don't support broadcasting both at same time
    assert np.ndim(X) == 1 or (np.ndim(q) == 0 and np.ndim(q) == 0 and np.ndim(alpha) == 0)

    q = q ** (1.0 / m)
    o_stats = order_stats(X)

    n = X.shape[-1]
    idx = _quantile(n, q)
    idx_lower, idx_upper = _quantile_CI(n, q, alpha=alpha)

    LB, UB = o_stats[..., idx_lower], o_stats[..., idx_upper]
    # Might need to broadcast estimate out if vectorization is in alpha
    estimate = ensure_shape(o_stats[..., idx], LB)
    return estimate, LB, UB


def min_quantile_CI(X, q, m, alpha=0.05):
    """Calculate confidence interval on `q` quantile of distribution on min of `m` iid samples using a data set `X`.

    This uses nonparametric estimation from order statistics and will have alpha level of at most `alpha` due to the
    discrete nature of order statistics.

    Parameters
    ----------
    X : :class:`numpy:numpy.ndarray` of shape (n,)
        Data for quantile estimation. Can be vectorized. Must be sortable data type (which is almost everything).
    q : float
        Quantile to compute, must be in (0, 1). Can be vectorized.
    m : int
        Compute statistics for distribution on min over `m` samples. Must be ``>= 1``. Can be vectorized.
    alpha : float
        False positive rate we allow for CI, must be in (0, 1). Can be vectorized.

    Returns
    -------
    estimate : dtype of `X`, scalar
        Best estimate on `q` quantile on min over `m` iid samples.
    LB : dtype of `X`, scalar
        Lower end on CI
    UB : dtype of `X`, scalar
        Upper end on CI
    """
    # X and alpha used/checked below in quantile_CI routine.
    # We could robustify things to allow the edge cases, but maybe later
    assert np.all(0 < q) and np.all(q < 1)
    # Could check int but if someone wants to interp, we will let them.
    assert np.all(m >= 1)
    # Currently don't support broadcasting both at same time
    assert np.ndim(X) == 1 or (np.ndim(q) == 0 and np.ndim(q) == 0 and np.ndim(alpha) == 0)

    # This might have numerics issues for small q
    q = 1.0 - (1.0 - q) ** (1.0 / m)
    o_stats = order_stats(X)

    n = X.shape[-1]
    idx = _quantile(n, q)
    idx_lower, idx_upper = _quantile_CI(n, q, alpha=alpha)

    LB, UB = o_stats[..., idx_lower], o_stats[..., idx_upper]
    # Might need to broadcast estimate out if vectorization is in alpha
    estimate = ensure_shape(o_stats[..., idx], LB)
    return estimate, LB, UB


def quantile_and_CI(X, q, alpha=0.05):
    """Calculate CI on `q` quantile from same `X` using nonparametric estimation from order statistics.

    This will have alpha level of at most `alpha` due to the discrete nature of order statistics.

    Parameters
    ----------
    X : :class:`numpy:numpy.ndarray` of shape (n,)
        Data for quantile estimation. Can be vectorized. Must be sortable data type (which is almost everything).
    q : float
        Quantile to compute, must be in (0, 1). Can be vectorized.
    alpha : float
        False positive rate we allow for CI, must be in (0, 1). Can be vectorized.

    Returns
    -------
    estimate : dtype of `X`, scalar
        Empirical `q` quantile from sample `X`.
    LB : dtype of `X`, scalar
        Lower end on CI
    UB : dtype of `X`, scalar
        Upper end on CI
    """
    # This routine is mostly just a wrapper routine
    estimate, LB, UB = max_quantile_CI(X, q=q, m=1, alpha=alpha)
    return estimate, LB, UB


================================================
FILE: bayesmark/random_search.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A baseline random search in our standardized optimizer interface. Useful for baselines.
"""
import numpy as np

from bayesmark import np_util
from bayesmark.space import JointSpace


def suggest_dict(X, y, meta, n_suggestions=1, random=np_util.random):
    """Stateless function to create suggestions for next query point in random search optimization.

    This implements the API for general structures of different data types.

    Parameters
    ----------
    X : list(dict)
        Places where the objective function has already been evaluated. Not actually used in random search.
    y : :class:`numpy:numpy.ndarray`, shape (n,)
        Corresponding values where objective has been evaluated. Not actually used in random search.
    meta : dict(str, dict)
        Configuration of the optimization variables. See API description.
    n_suggestions : int
        Desired number of parallel suggestions in the output
    random : :class:`numpy:numpy.random.RandomState`
        Optionally pass in random stream for reproducibility.

    Returns
    -------
    next_guess : list(dict)
        List of `n_suggestions` suggestions to evaluate the objective function.
        Each suggestion is a dictionary where each key corresponds to a parameter being optimized.
    """
    # Warp and get bounds
    space_x = JointSpace(meta)
    X_warped = space_x.warp(X)
    bounds = space_x.get_bounds()
    _, n_params = _check_x_y(X_warped, y, allow_impute=True)
    lb, ub = _check_bounds(bounds, n_params)

    # Get the suggestion
    suggest_x = random.uniform(lb, ub, size=(n_suggestions, n_params))

    # Unwarp
    next_guess = space_x.unwarp(suggest_x)
    return next_guess


def _check_x_y(X, y, allow_impute=False):  # pragma: validator
    """Input validation for `suggest` routine."""
    if not (np.ndim(X) == 2):
        raise ValueError("X must be 2-dimensional got %s." % str(np.shape(X)))
    n_obs, n_params = np.shape(X)

    assert n_params >= 1, "We do not support suggest on empty space."

    if not (np.shape(y) == (n_obs,)):
        raise ValueError("y must be %s not %s." % (str((n_obs,)), str(np.shape(y))))

    if not np.all(np.isfinite(X)):
        raise ValueError("X must be finite.")

    n_real_obs = n_obs
    if allow_impute:
        if not np.all(np.isfinite(y) | np.isnan(y)):
            raise ValueError("y can't contain infs even with data imputation.")
        n_real_obs = np.sum(np.isfinite(y))
    else:
        if not np.all(np.isfinite(y)):
            raise ValueError("y must be finite when data imputation not used.")

    return n_real_obs, n_params


def _check_bounds(bounds, n_params):  # pragma: validator
    """Input validation for `suggest` routine."""
    if not (np.shape(bounds) == (n_params, 2)):
        raise ValueError("bounds must have shape %s not %s." % (str((n_params, 2)), str(np.shape(bounds))))

    lb, ub = np.asarray(bounds).T
    if not (np.all(np.isfinite(lb)) and np.all(np.isfinite(ub))):
        raise ValueError("bounds must be finite.")
    if not (np.all(lb <= ub)):
        raise ValueError("lower bound must be less than upper bound.")
    return lb, ub


================================================
FILE: bayesmark/serialize.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A serialization abstraction layer (SAL) to save and load experimental results. All IO of experimental results should
go through this module. This makes changing the backend (between different databases) transparent to the benchmark code.
"""
import json
import os
import uuid
from abc import ABC, abstractmethod
from datetime import datetime
from tempfile import mkdtemp

import xarray as xr
from pathvalidate.argparse import validate_filename, validate_filepath

from bayesmark.path_util import join_safe_r, join_safe_w
from bayesmark.util import chomp, str_join_safe

NEWLINE = "\n"  # Just to be explicit, in case this ever gets run on Windows
PREFIX_FMT = "bo_%Y%m%d_%H%M%S_"  # The format we use for generating a new database name if none is specified

_XR_EXT = ".json"  # Extension we use for dumping xr.Dataset variables
_LOG_EXT = ".log"  # Extension to reccomend for logging files
_DERIVED_DIR = "derived"  # The folder for dervied variables (datasets)
_LOGGING_DIR = "log"  # The folder to reccomend for logging
_SETUP_STR = """
User must ensure
%s
exists, and setup folder using
mkdir %s
User must ensure equal reps of each optimizer for unbiased results."""


class Serializer(ABC):
    """Abstract base class for the serialization abstraction layer.
    """

    @staticmethod
    @abstractmethod
    def init_db(db_root, keys, db=None, exist_ok=True):
        """Initialize a "database" for storing data at the specified location.

        Parameters
        ----------
        db_root : str
            Absolute path to the database.
        keys : list(str)
            The variable names (or keys) we will store in the database for non-derived data.
        db : str
            The name of the database. If ``None``, a non-conflicting name will be generated.
        exist_ok : bool
            If true, do not raise an error if this database already exists.

        Returns
        -------
        db : str
            The name of the database.
        """
        pass

    @staticmethod
    @abstractmethod
    def get_keys(db_root, db):
        """List the non-derived keys available in the database.

        Parameters
        ----------
        db_root : str
            Absolute path to the database.
        db : str
            The name of the database.

        Returns
        -------
        keys : list(str)
            The variable names (or keys) in the database for non-derived data.
        """
        pass

    @staticmethod
    @abstractmethod
    def get_derived_keys(db_root, db):
        """List the derived keys currently available in the database.

        Parameters
        ----------
        db_root : str
            Absolute path to the database.
        db : str
            The name of the database.

        Returns
        -------
        keys : list(str)
            The variable names (or keys) in the database for derived data.
        """
        pass

    @staticmethod
    @abstractmethod
    def get_uuids(db_root, db, key):
        """List the UUIDs for the versions of a variable (non-derived key) available in the database.

        Parameters
        ----------
        db_root : str
            Absolute path to the database.
        db : str
            The name of the database.
        keys : str
            The variable name in the database for non-derived data.

        Returns
        -------
        uuids : list(uuid.UUID)
            The UUIDs for the versions of this key.
        """
        pass

    @staticmethod
    @abstractmethod
    def save(data, meta, db_root, db, key, uuid_):
        """Abstract method for saving experimental data, details require the type of `data`.
        """
        pass

    @staticmethod
    @abstractmethod
    def load(db_root, db, key, uuid_):
        """Abstract method for loading experimental data, details require the type of `data`.
        """
        pass

    @staticmethod
    @abstractmethod
    def save_derived(data, meta, db_root, db, key):
        """Abstract method for saving derived data, details require the type of `data`.
        """
        pass

    @staticmethod
    @abstractmethod
    def load_derived(db_root, db, key):
        """Abstract method for loading derived data, details require the type of `data`.
        """
        pass


class XRSerializer(Serializer):
    """Serialization layer when saving and loading `xarray` datasets (currently) as `json`.
    """

    def init_db(db_root, keys, db=None, exist_ok=True):  # pragma: io
        XRSerializer._validate(db_root, keys, db)

        if db is None:
            folder_prefix = datetime.utcnow().strftime(PREFIX_FMT)
            exp_subdir = mkdtemp(prefix=folder_prefix, dir=db_root)
            db = os.path.basename(exp_subdir)
            assert db.startswith(folder_prefix)
            assert os.path.join(db_root, db) == exp_subdir
        else:
            exp_subdir = os.path.join(db_root, db)
            os.makedirs(exp_subdir, exist_ok=exist_ok)

        subdirs = [_DERIVED_DIR, _LOGGING_DIR] + list(keys)
        for subd in subdirs:
            os.makedirs(os.path.join(exp_subdir, subd), exist_ok=exist_ok)

        return db

    def init_db_manual(db_root, keys, db):
        """Instruction for how one would manually initialize the "database" on another system.

        Parameters
        ----------
        db_root : str
            Absolute path to the database.
        keys : list(str)
            The variable names (or keys) we will store in the database for non-derived data.
        db : str
            The name of the database.

        Returns
        -------
        manual_setup_info : str
            The setup instructions.
        """
        XRSerializer._validate(db_root, keys, db)
        assert db is not None, "Must specify db name to setup manually."

        exp_subdir = os.path.join(db_root, db)
        subdirs = [_DERIVED_DIR, _LOGGING_DIR] + list(keys)
        manual_setup_info = _SETUP_STR % (exp_subdir, str_join_safe(" ", subdirs))
        return manual_setup_info

    def get_keys(db_root, db):  # pragma: io
        XRSerializer._validate(db_root, keys=(), db=db)

        keys = sorted(os.listdir(os.path.join(db_root, db)))
        keys.remove(_DERIVED_DIR)
        keys.remove(_LOGGING_DIR)
        return keys

    def get_derived_keys(db_root, db):  # pragma: io
        XRSerializer._validate(db_root, keys=(), db=db)

        fnames = sorted(os.listdir(os.path.join(db_root, db, _DERIVED_DIR)))
        keys = [XRSerializer._fname_to_key(ff) for ff in fnames]
        return keys

    def get_uuids(db_root, db, key):  # pragma: io
        XRSerializer._validate(db_root, keys=[key], db=db)

        fnames = sorted(os.listdir(os.path.join(db_root, db, key)))
        uuids = [XRSerializer._fname_to_uuid(ff) for ff in fnames]
        return uuids

    def save(data, meta, db_root, db, key, uuid_):  # pragma: io
        """Save a dataset under a key name in the database.

        Parameters
        ----------
        data : :class:`xarray:xarray.Dataset`
            An :class:`xarray:xarray.Dataset` variable we would like to store as non-derived data from an experiment.
        meta : json-serializable
            Associated meta-data with the experiment. This can be anything json serializable.
        db_root : str
            Absolute path to the database.
        db : str
            The name of the database.
        key : str
            The variable name in the database for the data.
        uuid_ : uuid.UUID
            The UUID to represent the version of this variable we are storing.
        """
        XRSerializer._validate(db_root, keys=[key], db=db)

        fname = XRSerializer._uuid_to_fname(uuid_)
        path = (db_root, db, key, fname)
        with open(join_safe_w(*path), "w") as f:
            _dump_xr(f, ds=data, meta=meta)

    def load(db_root, db, key, uuid_):  # pragma: io
        """Load a dataset under a key name in the database. This is the inverse of :func:`.save`.

        Parameters
        ----------
        db_root : str
            Absolute path to the database.
        db : str
            The name of the database.
        key : str
            The variable name in the database for the data.
        uuid_ : uuid.UUID
            The UUID to represent the version of this variable we want to load.

        Returns
        -------
        data : :class:`xarray:xarray.Dataset`
            An :class:`xarray:xarray.Dataset` variable for the non-derived data from an experiment.
        meta : json-serializable
            Associated meta-data with the experiment. This can be anything json serializable.
        """
        XRSerializer._validate(db_root, keys=[key], db=db)

        fname = XRSerializer._uuid_to_fname(uuid_)
        path = (db_root, db, key, fname)
        with open(join_safe_r(*path), "r") as f:
            ds, meta = _load_xr(f)
        return ds, meta

    def save_derived(data, meta, db_root, db, key):  # pragma: io
        """Save a dataset under a key name in the database as derived data.

        Parameters
        ----------
        data : :class:`xarray:xarray.Dataset`
            An :class:`xarray:xarray.Dataset` variable we would like to store as derived data from experiments.
        meta : json-serializable
            Associated meta-data with the experiments. This can be anything json serializable.
        db_root : str
            Absolute path to the database.
        db : str
            The name of the database.
        key : str
            The variable name in the database for the data.
        """
        XRSerializer._validate(db_root, keys=[key], db=db)

        fname = XRSerializer._key_to_fname(key)
        path = (db_root, db, _DERIVED_DIR, fname)
        with open(join_safe_w(*path), "w") as f:
            _dump_xr(f, ds=data, meta=meta)

    def load_derived(db_root, db, key):  # pragma: io
        """Load a dataset under a key name in the database as derived data. This is the inverse of :func:`.save_derived`.

        Parameters
        ----------
        db_root : str
            Absolute path to the database.
        db : str
            The name of the database.
        key : str
            The variable name in the database for the data.

        Returns
        -------
        data : :class:`xarray:xarray.Dataset`
            An :class:`xarray:xarray.Dataset` variable for the derived data from experiments.
        meta : json-serializable
            Associated meta-data with the experiments. This can be anything json serializable.
        """
        XRSerializer._validate(db_root, keys=[key], db=db)

        fname = XRSerializer._key_to_fname(key)
        path = (db_root, db, _DERIVED_DIR, fname)
        with open(join_safe_r(*path), "r") as f:
            data, meta = _load_xr(f)
        return data, meta

    def logging_path(db_root, db, uuid_):  # pragma: io
        """Get an absolute path for logging from an experiment given its UUID.

        Parameters
        ----------
        db_root : str
            Absolute path to the database.
        db : str
            The name of the database.
        uuid_ : uuid.UUID
            The UUID to represent this experiment.

        Returns
        -------
        logfile : str
            Absolute path suitable for logging in this experiment.
        """
        XRSerializer._validate(db_root, keys=(), db=db)
        assert isinstance(uuid_, uuid.UUID)

        fname = uuid_.hex + _LOG_EXT
        logfile = join_safe_w(db_root, db, _LOGGING_DIR, fname)
        return logfile

    def _fname_to_uuid(fname):
        uuid_ = uuid.UUID(chomp(fname, _XR_EXT))
        return uuid_

    def _uuid_to_fname(uuid_):
        assert isinstance(uuid_, uuid.UUID)  # This can be eliminated once we use type hints

        fname = uuid_.hex + _XR_EXT
        return fname

    def _key_to_fname(key):
        fname = key + _XR_EXT
        return fname

    def _fname_to_key(fname):
        key = chomp(fname, _XR_EXT)
        return key

    def _validate(db_root, keys=(), db=None):
        validate_filepath(db_root, platform="auto")
        assert os.path.isabs(db_root), "db_root must be absolute path"

        if db is not None:
            validate_filename(db, platform="universal")

        for kk in keys:
            validate_filename(kk, platform="universal")


def _dump_xr(f, ds, meta):  # pragma: io
    """Helper routine to `XRSerializer.save` and `XRSerializer.save_derived`.
    """
    assert isinstance(ds, xr.Dataset)  # Requiring Dataset and not DataArray for now

    meta_json = json.dumps(meta)  # meta can be anything that json can handle
    # JSON dumps seems pretty good about escaping, but check to be sure
    assert NEWLINE not in meta_json

    # Built in json dumper doesn't allow us to only line break on top-level, so we manually do this for now
    f.write('{"meta": %s,' % meta_json)
    f.write(NEWLINE)
    f.write('"data": ')

    json.dump(ds.to_dict(), f)
    f.write("}")
    f.write(NEWLINE)


def _load_xr(f):  # pragma: io
    """Helper routine to `XRSerializer.load` and`XRSerializer.load_derived`.
    """
    all_json = json.load(f)
    meta = all_json.pop("meta")
    ds = xr.Dataset.from_dict(all_json.pop("data"))
    return ds, meta


================================================
FILE: bayesmark/signatures.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Routines to compute and compare the "signatures" of objective functions. These are useful to make sure two different
studies were actually optimizing the same objective function (even if they say the same test case in the meta-data).
"""
import warnings

import numpy as np
import pandas as pd

import bayesmark.random_search as rs

# How many points to probe the function to get the signature
N_SUGGESTIONS = 5


def get_func_signature(f, api_config):
    """Get the function signature for an objective function in an experiment.

    Parameters
    ----------
    f : typing.Callable
        The objective function we want to compute the signature of. This function must take inputs in the form of
        ``dict(str, object)`` with one dictionary key per variable, and provide `float` as the output.
    api_config : dict(str, dict)
        Configuration of the optimization variables. See API description.

    Returns
    -------
    signature_x : list(dict(str, object)) of shape (n_suggest,)
        The input locations probed on signature call.
    signature_y : list(float) of shape (n_suggest,)
        The objective function values at the inputs points. This is the real signature.
    """
    # Make sure get same sequence on every call to be a signature
    random = np.random.RandomState(0)

    signature_x = rs.suggest_dict([], [], api_config, n_suggestions=N_SUGGESTIONS, random=random)

    # For now, we only take the first output as the signature. We can generalize this later.
    signature_y = [f(xx)[0] for xx in signature_x]
    assert np.all(np.isfinite(signature_y)), "non-finite values found in signature for function"
    return signature_x, signature_y


def analyze_signatures(signatures):
    """Analyze function signatures from the experiment.

    Parameters
    ----------
    signatures : dict(str, list(list(float)))
        The signatures should all be the same length, so it should be 2D array
        like.

    Returns
    -------
    sig_errs : :class:`pandas:pandas.DataFrame`
        rows are test cases, columns are test points.
    signatures_median : dict(str, list(float))
        Median signature across all repetition per test case.
    """
    sig_errs = {}
    signatures_median = {}
    for test_case, signature_y in signatures.items():
        assert len(signature_y) > 0, "signature with no cases found"
        assert np.all(np.isfinite(signature_y)), "non-finite values found in signature for function"

        minval = np.min(signature_y, axis=0)
        maxval = np.max(signature_y, axis=0)

        if not np.allclose(minval, maxval):
            # Arguably, the util should not raise the warning, and these should
            # be raised on the outside, but let's do this for simplicity.
            warnings.warn(
                "Signature diverged on %s betwen %s and %s" % (test_case, str(minval), str(maxval)), RuntimeWarning
            )
        sig_errs[test_case] = maxval - minval
        # ensure serializable using tolist
        signatures_median[test_case] = np.median(signature_y, axis=0).tolist()

    # Convert to pandas so easy to append margins with max, better for disp.
    # If we let the user convert to pandas then we don't need dep on pandas.
    sig_errs = pd.DataFrame(sig_errs).T
    sig_errs.loc["max", :] = sig_errs.max(axis=0)
    sig_errs.loc[:, "max"] = sig_errs.max(axis=1)

    return sig_errs, signatures_median


def analyze_signature_pair(signatures, signatures_ref):
    """Analyze a pair of signatures (often from two sets of experiments) and return the error between them.

    Parameters
    ----------
    signatures : dict(str, list(float))
        Signatures from set of experiments. The signatures must all be the same length, so it should be 2D array like.
    signatures_ref : dict(str, list(float))
        The signatures from a reference set of experiments. The keys in `signatures` must be a subset of the signatures
        in `signatures_ref`.

    Returns
    -------
    sig_errs : :class:`pandas:pandas.DataFrame`
        rows are test cases, columns are test points.
    signatures_median : dict(str, list(float))
        Median signature across all repetition per test case.
    """
    signatures_pair = {kk: [signatures[kk], signatures_ref[kk]] for kk in signatures}
    sig_errs, signatures_pair = analyze_signatures(signatures_pair)
    return sig_errs, signatures_pair


================================================
FILE: bayesmark/sklearn_funcs.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Routines to build a standardized interface to make `sklearn` hyper-parameter tuning problems look like an objective
function.

This file mostly contains a dictionary collection of all sklearn test funcs.

The format of each element in `MODELS` is:
model_name: (model_class, fixed_param_dict, search_param_api_dict)
`model_name` is an arbitrary name to refer to a certain strategy.
At usage time, the optimizer instance is created using:
``model_class(**kwarg_dict)``
The kwarg dict is `fixed_param_dict` + `search_param_dict`. The
`search_param_dict` comes from a optimizer which is configured using the
`search_param_api_dict`. See the API description for information on setting up
the `search_param_api_dict`.
"""
import os.path
import pickle as pkl
import warnings
from abc import ABC, abstractmethod

import numpy as np
from sklearn.ensemble import AdaBoostClassifier, AdaBoostRegressor, RandomForestClassifier, RandomForestRegressor
from sklearn.linear_model import Lasso, LogisticRegression, Ridge
from sklearn.metrics import get_scorer
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.svm import SVC, SVR
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor

from bayesmark.constants import ARG_DELIM, METRICS, MODEL_NAMES, VISIBLE_TO_OPT
from bayesmark.data import METRICS_LOOKUP, ProblemType, get_problem_type, load_data
from bayesmark.path_util import absopen
from bayesmark.space import JointSpace
from bayesmark.util import str_join_safe

# Using 3 would be faster, but 5 is the most realistic CV split (5-fold)
CV_SPLITS = 5

# We should add cat variables into some of these configurations but a lot of
# the wrappers for the BO methods really have trouble with cat types.

# kNN
knn_cfg = {
    "n_neighbors": {"type": "int", "space": "linear", "range": (1, 25)},
    "p": {"type": "int", "space": "linear", "range": (1, 4)},
}

# SVM
svm_cfg = {
    "C": {"type": "real", "space": "log", "range": (1.0, 1e3)},
    "gamma": {"type": "real", "space": "log", "range": (1e-4, 1e-3)},
    "tol": {"type": "real", "space": "log", "range": (1e-5, 1e-1)},
}

# DT
dt_cfg = {
    "max_depth": {"type": "int", "space": "linear", "range": (1, 15)},
    "min_samples_split": {"type": "real", "space": "logit", "range": (0.01, 0.99)},
    "min_samples_leaf": {"type": "real", "space": "logit", "range": (0.01, 0.49)},
    "min_weight_fraction_leaf": {"type": "real", "space": "logit", "range": (0.01, 0.49)},
    "max_features": {"type": "real", "space": "logit", "range": (0.01, 0.99)},
    "min_impurity_decrease": {"type": "real", "space": "linear", "range": (0.0, 0.5)},
}

# RF
rf_cfg = {
    "max_depth": {"type": "int", "space": "linear", "range": (1, 15)},
    "max_features": {"type": "real", "space": "logit", "range": (0.01, 0.99)},
    "min_samples_split": {"type": "real", "space": "logit", "range": (0.01, 0.99)},
    "min_samples_leaf": {"type": "real", "space": "logit", "range": (0.01, 0.49)},
    "min_weight_fraction_leaf": {"type": "real", "space": "logit", "range": (0.01, 0.49)},
    "min_impurity_decrease": {"type": "real", "space": "linear", "range": (0.0, 0.5)},
}

# MLP with ADAM
mlp_adam_cfg = {
    "hidden_layer_sizes": {"type": "int", "space": "linear", "range": (50, 200)},
    "alpha": {"type": "real", "space": "log", "range": (1e-5, 1e1)},
    "batch_size": {"type": "int", "space": "linear", "range": (10, 250)},
    "learning_rate_init": {"type": "real", "space": "log", "range": (1e-5, 1e-1)},
    "tol": {"type": "real", "space": "log", "range": (1e-5, 1e-1)},
    "validation_fraction": {"type": "real", "space": "logit", "range": (0.1, 0.9)},
    "beta_1": {"type": "real", "space": "logit", "range": (0.5, 0.99)},
    "beta_2": {"type": "real", "space": "logit", "range": (0.9, 1.0 - 1e-6)},
    "epsilon": {"type": "real", "space": "log", "range": (1e-9, 1e-6)},
}

# MLP with SGD
mlp_sgd_cfg = {
    "hidden_layer_sizes": {"type": "int", "space": "linear", "range": (50, 200)},
    "alpha": {"type": "real", "space": "log", "range": (1e-5, 1e1)},
    "batch_size": {"type": "int", "space": "linear", "range": (10, 250)},
    "learning_rate_init": {"type": "real", "space": "log", "range": (1e-5, 1e-1)},
    "power_t": {"type": "real", "space": "logit", "range": (0.1, 0.9)},
    "tol": {"type": "real", "space": "log", "range": (1e-5, 1e-1)},
    "momentum": {"type": "real", "space": "logit", "range": (0.001, 0.999)},
    "validation_fraction": {"type": "real", "space": "logit", "range": (0.1, 0.9)},
}

# AdaBoostClassifier
ada_cfg = {
    "n_estimators": {"type": "int", "space": "linear", "range": (10, 100)},
    "learning_rate": {"type": "real", "space": "log", "range": (1e-4, 1e1)},
}

# lasso
lasso_cfg = {
    "C": {"type": "real", "space": "log", "range": (1e-2, 1e2)},
    "intercept_scaling": {"type": "real", "space": "log", "range": (1e-2, 1e2)},
}

# linear
linear_cfg = {
    "C": {"type": "real", "space": "log", "range": (1e-2, 1e2)},
    "intercept_scaling": {"type": "real", "space": "log", "range": (1e-2, 1e2)},
}

MODELS_CLF = {
    "kNN": (KNeighborsClassifier, {}, knn_cfg),
    "SVM": (SVC, {"kernel": "rbf", "probability": True}, svm_cfg),
    "DT": (DecisionTreeClassifier, {"max_leaf_nodes": None}, dt_cfg),
    "RF": (RandomForestClassifier, {"n_estimators": 10, "max_leaf_nodes": None}, rf_cfg),
    "MLP-adam": (MLPClassifier, {"solver": "adam", "early_stopping": True}, mlp_adam_cfg),
    "MLP-sgd": (
        MLPClassifier,
        {"solver": "sgd", "early_stopping": True, "learning_rate": "invscaling", "nesterovs_momentum": True},
        mlp_sgd_cfg,
    ),
    "ada": (AdaBoostClassifier, {}, ada_cfg),
    "lasso": (
        LogisticRegression,
        {"penalty": "l1", "fit_intercept": True, "solver": "liblinear", "multi_class": "ovr"},
        lasso_cfg,
    ),
    "linear": (
        LogisticRegression,
        {"penalty": "l2", "fit_intercept": True, "solver": "liblinear", "multi_class": "ovr"},
        linear_cfg,
    ),
}

# For now, we will assume the default is to go thru all classifiers
assert sorted(MODELS_CLF.keys()) == sorted(MODEL_NAMES)

ada_cfg_reg = {
    "n_estimators": {"type": "int", "space": "linear", "range": (10, 100)},
    "learning_rate": {"type": "real", "space": "log", "range": (1e-4, 1e1)},
}

lasso_cfg_reg = {
    "alpha": {"type": "real", "space": "log", "range": (1e-2, 1e2)},
    "fit_intercept": {"type": "bool"},
    "normalize": {"type": "bool"},
    "max_iter": {"type": "int", "space": "log", "range": (10, 5000)},
    "tol": {"type": "real", "space": "log", "range": (1e-5, 1e-1)},
    "positive": {"type": "bool"},
}

linear_cfg_reg = {
    "alpha": {"type": "real", "space": "log", "range": (1e-2, 1e2)},
    "fit_intercept": {"type": "bool"},
    "normalize": {"type": "bool"},
    "max_iter": {"type": "int", "space": "log", "range": (10, 5000)},
    "tol": {"type": "real", "space": "log", "range": (1e-4, 1e-1)},
}

MODELS_REG = {
    "kNN": (KNeighborsRegressor, {}, knn_cfg),
    "SVM": (SVR, {"kernel": "rbf"}, svm_cfg),
    "DT": (DecisionTreeRegressor, {"max_leaf_nodes": None}, dt_cfg),
    "RF": (RandomForestRegressor, {"n_estimators": 10, "max_leaf_nodes": None}, rf_cfg),
    "MLP-adam": (MLPRegressor, {"solver": "adam", "early_stopping": True}, mlp_adam_cfg),
    "MLP-sgd": (
        MLPRegressor,  # regression crashes often with relu
        {
            "activation": "tanh",
            "solver": "sgd",
            "early_stopping": True,
            "learning_rate": "invscaling",
            "nesterovs_momentum": True,
        },
        mlp_sgd_cfg,
    ),
    "ada": (AdaBoostRegressor, {}, ada_cfg_reg),
    "lasso": (Lasso, {}, lasso_cfg_reg),
    "linear": (Ridge, {"solver": "auto"}, linear_cfg_reg),
}

# If both classifiers and regressors match MODEL_NAMES then the experiment
# launcher can simply go thru the cartesian product and do all combos.
assert sorted(MODELS_REG.keys()) == sorted(MODEL_NAMES)


class TestFunction(ABC):
    """Abstract base class for test functions in the benchmark. These do not need to be ML hyper-parameter tuning.
    """

    def __init__(self):
        """Setup general test function for benchmark. We assume the test function knows the meta-data about the search
        space, but is also stateless to fit modeling assumptions. To keep stateless, it does not do things like count
        the number of function evaluations.
        """
        # This will need to be set before using other routines
        self.api_config = None

    @abstractmethod
    def evaluate(self, params):
        """Abstract method to evaluate the function at a parameter setting.
        """

    def get_api_config(self):
        """Get the API config for this test problem.

        Returns
        -------
        api_config : dict(str, dict(str, object))
            The API config for the used model. See README for API description.
        """
        assert self.api_config is not None, "API config is not set."
        return self.api_config


class SklearnModel(TestFunction):
    """Test class for sklearn classifier/regressor CV score objective functions.
    """

    # Map our short names for metrics to the full length sklearn name
    _METRIC_MAP = {
        "nll": "neg_log_loss",
        "acc": "accuracy",
        "mae": "neg_mean_absolute_error",
        "mse": "neg_mean_squared_error",
    }

    # This can be static and constant for now
    objective_names = (VISIBLE_TO_OPT, "generalization")

    def __init__(self, model, dataset, metric, shuffle_seed=0, data_root=None):
        """Build class that wraps sklearn classifier/regressor CV score for use as an objective function.

        Parameters
        ----------
        model : str
            Which classifier to use, must be key in `MODELS_CLF` or `MODELS_REG` dict depending on if dataset is
            classification or regression.
        dataset : str
            Which data set to use, must be key in `DATA_LOADERS` dict, or name of custom csv file.
        metric : str
            Which sklearn scoring metric to use, in `SCORERS_CLF` list or `SCORERS_REG` dict depending on if dataset is
            classification or regression.
        shuffle_seed : int
            Random seed to use when splitting the data into train and validation in the cross-validation splits. This
            is needed in order to keep the split constant across calls. Otherwise there would be extra noise in the
            objective function for varying splits.
        data_root : str
            Root directory to look for all custom csv files.
        """
        TestFunction.__init__(self)
        data, target, problem_type = load_data(dataset, data_root=data_root)
        assert problem_type in (ProblemType.clf, ProblemType.reg)
        self.is_classifier = problem_type == ProblemType.clf

        # Do some validation on loaded data
        assert isinstance(data, np.ndarray)
        assert isinstance(target, np.ndarray)
        assert data.ndim == 2 and target.ndim == 1
        assert data.shape[0] == target.shape[0]
        assert data.size > 0
        assert data.dtype == np.float_
        assert np.all(np.isfinite(data))  # also catch nan
        assert target.dtype == (np.int_ if self.is_classifier else np.float_)
        assert np.all(np.isfinite(target))  # also catch nan

        model_lookup = MODELS_CLF if self.is_classifier else MODELS_REG
        base_model, fixed_params, api_config = model_lookup[model]

        # New members for model
        self.base_model = base_model
        self.fixed_params = fixed_params
        self.api_config = api_config

        # Always shuffle your data to be safe. Use fixed seed for reprod.
        self.data_X, self.data_Xt, self.data_y, self.data_yt = train_test_split(
            data, target, test_size=0.2, random_state=shuffle_seed, shuffle=True
        )

        assert metric in METRICS, "Unknown metric %s" % metric
        assert metric in METRICS_LOOKUP[problem_type], "Incompatible metric %s with problem type %s" % (
            metric,
            problem_type,
        )
        self.scorer = get_scorer(SklearnModel._METRIC_MAP[metric])

    def evaluate(self, params):
        """Evaluate the sklearn CV objective at a particular parameter setting.

        Parameters
        ----------
        params : dict(str, object)
            The varying (non-fixed) parameter dict to the sklearn model.

        Returns
        -------
        cv_loss : float
            Average loss over CV splits for sklearn model when tested using the settings in params.
        """
        params = dict(params)  # copy to avoid modification of original
        params.update(self.fixed_params)  # add in fixed params

        # now build the skl object
        clf = self.base_model(**params)

        assert np.all(np.isfinite(self.data_X)), "all features must be finite"
        assert np.all(np.isfinite(self.data_y)), "all targets must be finite"

        # Do the x-val, ignore user warn since we expect BO to try weird stuff
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=UserWarning)
            S = cross_val_score(clf, self.data_X, self.data_y, scoring=self.scorer, cv=CV_SPLITS)
        # Take the mean score across all x-val splits
        cv_score = np.mean(S)

        # Now let's get the generalization error for same hypers
        clf = self.base_model(**params)
        clf.fit(self.data_X, self.data_y)
        generalization_score = self.scorer(clf, self.data_Xt, self.data_yt)

        # get_scorer makes everything a score not a loss, so we need to negate to get the loss back
        cv_loss = -cv_score
        assert np.isfinite(cv_loss), "loss not even finite"
        generalization_loss = -generalization_score
        assert np.isfinite(generalization_loss), "loss not even finite"

        # Unbox to basic float to keep it simple
        cv_loss = cv_loss.item()
        assert isinstance(cv_loss, float)
        generalization_loss = generalization_loss.item()
        assert isinstance(generalization_loss, float)

        # For now, score with same objective. We can later add generalization error
        return cv_loss, generalization_loss

    @staticmethod
    def test_case_str(model, dataset, scorer):
        """Generate the combined test case string from model, dataset, and scorer combination."""
        test_case = str_join_safe(ARG_DELIM, (model, dataset, scorer))
        return test_case

    @staticmethod
    def inverse_test_case_str(test_case):
        """Inverse of `test_case_str`."""
        model, dataset, scorer = test_case.split(ARG_DELIM)
        assert test_case == SklearnModel.test_case_str(model, dataset, scorer)
        return model, dataset, scorer


class SklearnSurrogate(TestFunction):
    """Test class for sklearn classifier/regressor CV score objective function surrogates.
    """

    # This can be static and constant for now
    objective_names = (VISIBLE_TO_OPT, "generalization")

    def __init__(self, model, dataset, scorer, path):
        """Build class that wraps sklearn classifier/regressor CV score for use as an objective function surrogate.

        Parameters
        ----------
        model : str
            Which classifier to use, must be key in `MODELS_CLF` or `MODELS_REG` dict depending on if dataset is
            classification or regression.
        dataset : str
            Which data set to use, must be key in `DATA_LOADERS` dict, or name of custom csv file.
        scorer : str
            Which sklearn scoring metric to use, in `SCORERS_CLF` list or `SCORERS_REG` dict depending on if dataset is
            classification or regression.
        path : str
            Root directory to look for all pickle files.
        """
        TestFunction.__init__(self)

        # Find the space class, we could consider putting this in pkl too
        problem_type = get_problem_type(dataset)
        assert problem_type in (ProblemType.clf, ProblemType.reg)
        _, _, self.api_config = MODELS_CLF[model] if problem_type == ProblemType.clf else MODELS_REG[model]
        self.space = JointSpace(self.api_config)

        # Load the pre-trained model
        fname = SklearnModel.test_case_str(model, dataset, scorer) + ".pkl"

        if isinstance(path, bytes):
            # This is for test-ability, we could use mock instead.
            self.model = pkl.loads(path)
        else:
            path = os.path.join(path, fname)  # pragma: io
            assert os.path.isfile(path), "Model file not found: %s" % path

            with absopen(path, "rb") as f:  # pragma: io
                self.model = pkl.load(f)  # pragma: io
        assert callable(getattr(self.model, "predict", None))

    def evaluate(self, params):
        """Evaluate the sklearn CV objective at a particular parameter setting.

        Parameters
        ----------
        params : dict(str, object)
            The varying (non-fixed) parameter dict to the sklearn model.

        Returns
        -------
        overall_loss : float
            Average loss over CV splits for sklearn model when tested using the settings in params.
        """
        x = self.space.warp([params])
        y, = self.model.predict(x)

        assert y.shape == (len(self.objective_names),)
        assert y.dtype.kind == "f"

        assert np.all(-np.inf < y)  # Will catch nan too
        y = tuple(y.tolist())  # Make consistent with SklearnModel typing
        return y


================================================
FILE: bayesmark/space.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Do the conversion of search spaces into a normalized cartesian space.
"""
import numpy as np
from scipy.interpolate import interp1d
from scipy.special import expit as logistic  # because nobody calls it expit
from scipy.special import logit

from bayesmark.np_util import clip_chk, snap_to

WARPED_DTYPE = np.float_
N_GRID_DEFAULT = 8

# I can't make up mind of unicode or str is better wrt to Py 2/3 compatibility
# ==> Just make a global constant and make sure it works either way.
# Note: if we switch to np.str_, we will also need to update doc-strings!
CAT_DTYPE = np.unicode_
CAT_KIND = "U"
CAT_NATIVE_DTYPE = str
# Check to make sure consistent
assert CAT_KIND == np.dtype(CAT_DTYPE).kind
_infered = type(CAT_DTYPE("").item())
assert CAT_NATIVE_DTYPE == _infered

# ============================================================================
# These could go into util
# ============================================================================


def unravel_index(dims):
    """Builds tuple of coordinate arrays to traverse an `numpy` array.

    Wrapper around :func:`numpy:numpy.unravel_index` that avoids bug at corner case for ``dims=()``. The fix for this
    has been merged into the numpy master branch Oct 18, 2017 so future numpy releases will make this wrapper not
    needed. Otherwise, ``unravel_index(X.shape)`` is equivalent to: ``np.unravel_index(range(X.size), X.shape)``.

    Parameters
    ----------
    dims : tuple(int)
        The shape of the array to use for unraveling ``indices``.

    Returns
    -------
    unraveled_coords : tuple(:class:`numpy:numpy.ndarray`)
        Each array in the tuple has shape (n,) where ``n=np.prod(dims)``.

    References
    ----------
    unravel_index(0, ()) should return () (Trac #2120) #580
    https://github.com/numpy/numpy/issues/580
    Allow `unravel_index(0, ())` to return () #9884
    https://github.com/numpy/numpy/pull/9884
    """
    size = np.prod(dims)
    if dims == () or size == 0:  # The corner case
        return ()

    idx = np.unravel_index(range(np.prod(dims)), dims)
    return idx


def encode(X, labels, assume_sorted=False, dtype=bool, assume_valid=False):
    """Perform one hot encoding of categorical data in :class:`numpy:numpy.ndarray` variable `X` of any dimension.

    Parameters
    ----------
    X : :class:`numpy:numpy.ndarray` of shape (...)
        Categorical values of any standard type. Vectorized to work for any dimensional `X`.
    labels : :class:`numpy:numpy.ndarray` of shape (n,)
        Complete list of all possible labels. List is flattened if it is not already 1 dimensional.
    assume_sorted : bool
        If true, assume labels is already sorted and unique. This saves the computational cost of calling
        :func:`numpy:numpy.unique`.
    dtype : type
        Desired data of feature array. One-hot is most logically `bool`, but feature matrices are usually `float`.
    assume_valid : bool
        If true, assume all element of `X` are in the list `labels`. This saves the computational cost of verifying
        `X` are in `labels`. If true and a non-label `X` occurs this routine will silently give bogus result.

    Returns
    -------
    Y : :class:`numpy:numpy.ndarray` of shape (..., n)
        One-hot encoding of `X`. Extra dimension is appended at end for the one-hot vector. It has data type `dtype`.
    """
    X = np.asarray(X)
    labels = np.asarray(labels) if assume_sorted else np.unique(labels)
    check_array(labels, "labels", pre=True, ndim=1, min_size=1)

    idx = np.searchsorted(labels, X)
    # If x is not even in labels then this will fail. This is not ValueError
    # because the user explictly asked for this using argument assume_valid.
    assert assume_valid or np.all(np.asarray(labels[idx]) == X)

    # This is using some pro np indexing technique to vectorize across all
    # possible input dimensions for X in the same code.
    Y = np.zeros(X.shape + (len(labels),), dtype=dtype)
    Y[unravel_index(X.shape) + (idx.ravel(),)] = True
    return Y


def decode(Y, labels, assume_sorted=False):
    """Perform inverse of one-hot encoder `encode`.

    Parameters
    ----------
    Y : :class:`numpy:numpy.ndarray` of shape (..., n)
        One-hot encoding of categorical data `X`. Extra dimension is appended at end for the one-hot vector. Maximum
        element is taken if there is more than one non-zero entry in one-hot vector.
    labels : :class:`numpy:numpy.ndarray` of shape (n,)
        Complete list of all possible labels. List is flattened if it is not already 1-dimensional.
    assume_sorted : bool
        If true, assume labels is already sorted and unique. This saves the computational cost of calling
        :func:`numpy:numpy.unique`.

    Returns
    -------
    X : :class:`numpy:numpy.ndarray` of shape (...)
        Categorical values corresponding to one-hot encoded `Y`.
    """
    Y = np.asarray(Y)
    labels = np.asarray(labels) if assume_sorted else np.unique(labels)
    check_array(labels, "labels", pre=True, ndim=1, min_size=1)
    check_array(Y, "Y", pre=True, shape_endswith=(len(labels),))

    idx = np.argmax(Y, axis=-1)
    X = labels[idx]
    return X


def _error(msg, pre=False):  # pragma: validator
    """Helper routine for :func:`.check_array`.

    This could probably be made cleaner by using raise to create the assert.
    """
    if pre:
        raise ValueError(msg)
    else:
        assert False, msg


def check_array(
    X,
    name,
    pre=False,
    ndim=None,
    shape=None,
    shape_endswith=(),
    min_size=0,
    dtype=None,
    kind=None,
    allow_infinity=True,
    allow_nan=True,
    unsorted=True,
    whitelist=None,
):  # pragma: validator
    """Like :func:`sklearn:sklearn.utils.check_array` but better.

    Check specified property of input array `X`. If an argument is not specified it passes by default.

    Parameters
    ----------
    X : :class:`numpy:numpy.ndarray`
        `numpy` array we want to validate.
    name : str
        Human readable name of of variable to refer to it in error messages. Note this can include spaces unlike simply
        using the variable name.
    pre : bool
        If true, interpret this as check as validating pre-conditions to a function and will raise a `ValueError` if a
        check fails. If false, assumes we are checking post-conditions and will raise an assertion failure.
    ndim : int
        Expected value of ``X.ndim``.
    shape : tuple(int)
        Expected value of ``X.shape``.
    shape_endswith : tuple(int)
        Expected that ``X.shape`` ends with `shape_endswith`. This is useful in broadcasting where extra dimensions
        might be added on.
    min_size : int
        Minimum value for ``X.size``
    dtype : dtype
        Expected value of ``X.dtype``.
    kind : str
        Expected value of ``X.dtype.kind``. This is `'f'` for `float`, `'i'` for `int`, and so on.
    allow_infinity : bool
        If false, the check fails when `X` contains inf or ``-inf``.
    allow_nan : bool
        If false, the check fails when `X` contains a ``NaN``.
    unsorted : bool
        If false, the check fails when `X` is not in sorted order. This is designed to even work with string arrays.
    whitelist : :class:`numpy:numpy.ndarray`
        Array containing allowed values for `X`. If an element of `X` is not found in `whitelist`, the check fails.
    """
    if (ndim is not None) and X.ndim != ndim:
        _error("Expected %d dimensions for %s, got %d" % (ndim, name, X.ndim), pre)

    if (shape is not None) and X.shape != shape:
        _error("Expected shape %s for %s, got %s" % (str(shape), name, str(X.shape)), pre)

    if len(shape_endswith) > 0:
        if X.shape[-len(shape_endswith) :] != shape_endswith:
            if len(shape_endswith) == 1:
                _error("Expected shape (..., %d) for %s, got %s" % (shape_endswith[0], name, str(X.shape)), pre)
            else:
                _error("Expected shape (..., %s for %s, got %s" % (str(shape_endswith)[1:], name, str(X.shape)), pre)

    if (min_size > 0) and (X.size < min_size):
        _error("%s needs at least %d elements, it has %d" % (name, min_size, X.size), pre)

    if (dtype is not None) and X.dtype != np.dtype(dtype):
        _error("Expected dtype %s for %s, got %s" % (str(np.dtype(dtype)), name, str(X.dtype)), pre)

    if (kind is not None) and X.dtype.kind != kind:
        _error("Expected array with kind %s for %s, got %s" % (kind, name, str(X.dtype.kind)), pre)

    if (not allow_infinity) and np.any(np.abs(X) == np.inf):
        _error("Infinity is not allowed in %s" % name, pre)

    if (not allow_nan) and np.any(np.isnan(X)):
        _error("NaN is not allowed in %s" % name, pre)

    if whitelist is not None:
        ok = np.all([xx in whitelist for xx in np.nditer(X, ["zerosize_ok"])])
        if not ok:
            _error("Expected all elements of %s to be in %s" % (name, str(whitelist)), pre)

    # Only do this check in 1D
    if X.ndim == 1 and (not unsorted) and np.any(X[:-1] > X[1:]):
        _error("Expected sorted input for %s" % name, pre)


# ============================================================================
# Setup warping dictionaries
# ============================================================================


def identity(x):
    """Helper function that perform warping in linear space. Sort of a no-op.

    Parameters
    ----------
    x : scalar
        Input variable in linear space. Can be any numeric type and is vectorizable.

    Returns
    -------
    y : scalar
        Same as input `x`.
    """
    y = x
    return y


def bilog(x):
    """Bilog warping function. Extension of log to work with negative numbers.

    ``Bilog(x) ~= log(x)`` for large `x` or ``-log(abs(x))`` if `x` is negative. However, the bias term ensures good
    behavior near 0 and ``bilog(0) = 0``.

    Parameters
    ----------
    x : scalar
        Input variable in linear space. Can be any numeric type and is vectorizable.

    Returns
    -------
    y : float
        The bilog of `x`.
    """
    y = np.sign(x) * np.log(1.0 + np.abs(x))
    return y


def biexp(x):
    """Inverse of :func:`.bilog` function.

    Parameters
    ----------
    x : scalar
        Input variable in linear space. Can be any numeric type and is vectorizable.

    Returns
    -------
    y : float
        The biexp of `x`.
    """
    y = np.sign(x) * (np.exp(np.abs(x)) - 1.0)
    return y


WARP_DICT = {"linear": identity, "log": np.log, "logit": logit, "bilog": bilog}
UNWARP_DICT = {"linear": identity, "log": np.exp, "logit": logistic, "bilog": biexp}

# ============================================================================
# Setup spaces class hierarchy
# ============================================================================


class Space(object):
    """Base class for all types of variables.
    """

    def __init__(self, dtype, default_round, warp="linear", values=None, range_=None):
        """Generic constructor of `Space` class.

        Not intended to be called directly but instead by child classes. However, `Space` is not an abstract class and
        will not give an error when instantiated.
        """
        self.dtype = dtype
        assert warp in WARP_DICT, "invalid space %s, allowed spaces are: %s" % (str(warp), str(WARP_DICT.keys()))
        self.warp_f = WARP_DICT[warp]
        self.unwarp_f = UNWARP_DICT[warp]

        # Setup range and rounding if values is suplied
        assert (values is None) != (range_ is None)
        round_to_values = default_round
        if range_ is None:  # => value is not None
            # Debatable if unique should be done before or after cast. But I
            # think after is better, esp. when changing precisions.
            values = np.asarray(values, dtype=dtype)
            values = np.unique(values)  # values now 1D ndarray no matter what
            check_array(
                values,
                "unique values",
                pre=True,
                ndim=1,
                dtype=dtype,
                min_size=2,
                allow_infinity=False,
                allow_nan=False,
            )

            # Extrapolation might happen due to numerics in type conversions.
            # Bounds checking is still done in validate routines.
            round_to_values = interp1d(values, values, kind="nearest", fill_value="extrapolate")
            range_ = (values[0], values[-1])
        # Save values and rounding
        # Values is either None or was validated inside if statement
        self.values = values
        self.round_to_values = round_to_values

        # Note that if dtype=None that is the default for asarray.
        range_ = np.asarray(range_, dtype=dtype)
        check_array(range_, "range", pre=True, shape=(2,), dtype=dtype, unsorted=False)
        # Save range info, with input validation and post validation
        self.lower, self.upper = range_

        # Convert to warped bounds too with lots of post validation
        self.lower_warped, self.upper_warped = self.warp_f(range_[..., None]).astype(WARPED_DTYPE, copy=False)
        check_array(
            self.lower_warped,
            "warped lower bound %s(%.1f)" % (warp, self.lower),
            ndim=1,
            pre=True,
            dtype=WARPED_DTYPE,
            allow_infinity=False,
            allow_nan=False,
        )
        # Should never happen if warpers are strictly monotonic:
        assert np.all(self.lower_warped <= self.upper_warped)

        # Make sure a bit bigger to keep away from lower due to numerics
        self.upper_warped = np.maximum(self.upper_warped, np.nextafter(self.lower_warped, np.inf))
        check_array(
            self.upper_warped,
            "warped upper bound %s(%.1f)" % (warp, self.upper),
            pre=True,
            shape=self.lower_warped.shape,
            dtype=WARPED_DTYPE,
            allow_infinity=False,
            allow_nan=False,
        )
        # Should never happen if warpers are strictly monotonic:
        assert np.all(self.lower_warped < self.upper_warped)

    def validate(self, X, pre=False):
        """Routine to validate inputs to warp.

        This routine does not perform any checking on the dimensionality of `X` and is fully vectorized.
        """
        X = np.asarray(X, dtype=self.dtype)

        if self.values is None:
            X = clip_chk(X, self.lower, self.upper)
        else:
            check_array(X, "X", pre=pre, whitelist=self.values)

        return X

    def validate_warped(self, X, pre=False):
        """Routine to validate inputs to unwarp. This routine is vectorized, but `X` must have at least 1-dimension.
        """
        X = np.asarray(X, dtype=WARPED_DTYPE)
        check_array(X, "X", pre=pre, shape_endswith=(len(self.lower_warped),))

        X = clip_chk(X, self.lower_warped, self.upper_warped)
        return X

    def warp(self, X):
        """Warp inputs to a continuous space.

        Parameters
        ----------
        X : :class:`numpy:numpy.ndarray` of shape (...)
            Input variables to warp. This is vectorized to work in any dimension, but it must have the same type code
            as the class, which is in `self.type_code`.

        Returns
        -------
        X_w : :class:`numpy:numpy.ndarray` of shape (..., m)
            Warped version of input space. By convention there is an extra dimension on warped array.
            Currently, ``m=1`` for all warpers. `X_w` will have a `float` type.
        """
        X = self.validate(X, pre=True)

        X_w = self.warp_f(X)
        X_w = X_w[..., None]  # Convention is that warped has extra dim

        X_w = self.validate_warped(X_w)  # Ensures of WAPRED_DTYPE
        check_array(X_w, "X", ndim=X.ndim + 1, dtype=WARPED_DTYPE)
        return X_w

    def unwarp(self, X_w):
        """Inverse of `warp` function.

        Parameters
        ----------
        X_w : :class:`numpy:numpy.ndarray` of shape (..., m)
            Warped version of input space. This is vectorized to work in any dimension. But, by convention, there is an
            extra dimension on the warped array. Currently, the last dimension ``m=1`` for all warpers. `X_w` must be of
            a `float` type.

        Returns
        -------
        X : :class:`numpy:numpy.ndarray` of shape (...)
            Unwarped version of `X_w`. `X` will have the same type code as the class, which is in `self.type_code`.
        """
        X_w = self.validate_warped(X_w, pre=True)

        X = clip_chk(self.unwarp_f(X_w[..., 0]), self.lower, self.upper)
        X = self.round_to_values(X)

        X = self.validate(X)  # Ensures of dtype
        check_array(X, "X", ndim=X_w.ndim - 1, dtype=self.dtype)
        return X

    def get_bounds(self):
        """Get bounds of the warped space.

        Returns
        -------
        bounds : :class:`numpy:numpy.ndarray` of shape (D, 2)
            Bounds in the warped space. First column is the lower bound and the second column is the upper bound.
            Calling ``bounds.tolist()`` gives the bounds in the standard form expected by `scipy` optimizers:
            ``[(lower_1, upper_1), ..., (lower_n, upper_n)]``.
        """
        bounds = np.stack((self.lower_warped, self.upper_warped), axis=1)
        check_array(bounds, "bounds", shape=(len(self.lower_warped), 2), dtype=WARPED_DTYPE)
        return bounds

    def grid(self, max_interp=N_GRID_DEFAULT):
        """Return grid spanning the original (unwarped) space.

        Parameters
        ----------
        max_interp : int
            The number of points to use in grid space when a range and not values are used to define the space.
            Must be ``>= 0``.

        Returns
        -------
        values : list
            Grid spanning the original space. This is simply `self.values` if a grid has already been specified,
            otherwise it is just grid across the range.
        """
        values = self.values
        if values is None:
            vw = np.linspace(self.lower_warped, self.upper_warped, max_interp)
            # Some spaces like int make result in duplicates after unwarping
            # so we apply unique to avoid this. However this will usually be
            # wasted computation.
            values = np.unique(self.unwarp(vw[:, None]))
            check_array(values, "values", ndim=1, dtype=self.dtype)

        # Best to convert to list to make sure in native type
        values = values.tolist()
        return values


class Real(Space):
    """Space for transforming real variables to normalized space (after warping).
    """

    def __init__(self, warp="linear", values=None, range_=None):
        """Build Real space class.

        Parameters
        ----------
        warp : {'linear', 'log', 'logit', 'bilog'}
            Which warping type to apply to the space. The warping is applied in the original space. That is, in a space
            with ``warp='log'`` and ``range_=(2.0, 10.0)``, the value 2.0 warps to ``log(2)``, not ``-inf`` as in some
            other frameworks.
        values : None or list(float)
            Possible values for space to take. Values must be of `float` type.
        range_ : None or :class:`numpy:numpy.ndarray` of shape (2,)
            Array with (lower, upper) pair with limits of space. Note that one must specify `values` or `range_`, but
            not both. `range_` must be composed of `float`.
        """
        assert warp is not None, "warp/space not specified for real"
        Space.__init__(self, np.float_, identity, warp, values, range_)


class Integer(Space):
    """Space for transforming integer variables to continuous normalized space.
    """

    def __init__(self, warp="linear", values=None, range_=None):
        """Build Integer space class.

        Parameters
        ----------
        warp : {'linear', 'log', 'bilog'}
            Which warping type to apply to the space. The warping is applied in the original space. That is, in a space
            with ``warp='log'`` and ``range_=(2, 10)``, the value 2 warps to ``log(2)``, not ``-inf`` as in some other
            frameworks. There are no settings with integers that are compatible with the logit warp.
        values : None or list(float)
            Possible values for space to take. Values must be of `int` type.
        range_ : None or :class:`numpy:numpy.ndarray` of shape (2,)
            Array with (lower, upper) pair with limits of space. Note that one must specify `values` or `range_`, but
            not both. `range_` must be composed of `int`.
        """
        assert warp is not None, "warp/space not specified for int"
        Space.__init__(self, np.int_, np.round, warp, values, range_)


class Boolean(Space):
    """Space for transforming Boolean variables to continuous normalized space.
    """

    def __init__(self, warp=None, values=None, range_=None):
        """Build Boolean space class.

        Parameters
        ----------
        warp : None
            Must be omitted or None, provided for consitency with other types.
        values : None
            Must be omitted or None, provided for consitency with other types.
        range_ : None
            Must be omitted or None, provided for consitency with other types.
        """
        assert warp is None, "cannot warp bool"
        assert (values is None) and (range_ is None), "cannot pass in values or range for bool"
        self.dtype = np.bool_
        self.warp_f = identity
        self.unwarp_f = identity

        self.values = np.array([False, True], dtype=np.bool_)
        self.round_to_values = np.round

        self.lower, self.upper = self.dtype(False), self.dtype(True)
        self.lower_warped = np.array([0.0], dtype=WARPED_DTYPE)
        self.upper_warped = np.array([1.0], dtype=WARPED_DTYPE)


class Categorical(Space):
    """Space for transforming categorical variables to continuous normalized space.
    """

    def __init__(self, warp=None, values=None, range_=None):
        """Build Integer space class.

        Parameters
        ----------
        warp : None
            Must be omitted or None, provided for consitency with other types.
        values : list(str)
            Possible values for space to take. Values must be unicode strings. Requiring type unicode (``'U'``) rather
            than strings (``'S'``) corresponds to the native string type.
        range_ : None
            Must be omitted or None, provided for consitency with other types.
        """
        assert warp is None, "cannot warp cat"
        assert values is not None, "must pass in explicit values for cat"
        assert range_ is None, "cannot pass in range for cat"

        values = np.unique(values)  # values now 1D ndarray no matter what
        check_array(values, "values", pre=True, ndim=1, kind=CAT_KIND, min_size=2)
        self.values = values

        self.dtype = CAT_DTYPE
        # Debatable if decode should go in unwarp or round_to_values

        self.warp_f = self._encode
        self.unwarp_f = identity
        self.round_to_values = self._decode

        self.lower, self.upper = None, None  # Don't need them
        self.lower_warped = np.zeros(len(values), dtype=WARPED_DTYPE)
        self.upper_warped = np.ones(len(values), dtype=WARPED_DTYPE)

    def _encode(self, x):
        return encode(x, self.values, True, WARPED_DTYPE, True)

    def _decode(self, x):
        return decode(x, self.values, True)

    def warp(self, X):
        """Warp inputs to a continuous space.

        Parameters
        ----------
        X : :class:`numpy:numpy.ndarray` of shape (...)
            Input variables to warp. This is vectorized to work in any dimension, but it must have the same
            type code as the class, which is unicode (``'U'``) for the :class:`.Categorical` space.

        Returns
        -------
        X_w : :class:`numpy:numpy.ndarray` of shape (..., m)
            Warped version of input space. By convention there is an extra dimension on warped array. The warped space
            has a one-hot encoding and therefore `m` is the number of possible values in the space. `X_w` will have
            a `float` type.
        """
        X = self.validate(X, pre=True)

        X_w = self.warp_f(X)

        # Probably over kill to validate here too, but why not:
        X_w = self.validate_warped(X_w)
        check_array(X_w, "X", ndim=X.ndim + 1, dtype=WARPED_DTYPE)
        return X_w

    def unwarp(self, X_w):
        """Inverse of `warp` function.

        Parameters
        ----------
        X_w : :class:`numpy:numpy.ndarray` of shape (..., m)
            Warped version of input space. The warped space has a one-hot encoding and therefore `m` is the number of
            possible values in the space. `X_w` will have a `float` type. Non-zero/one values are allowed in `X_w`.
            The maximal element in the vector is taken as the encoded value.

        Returns
        -------
        X : :class:`numpy:numpy.ndarray` of shape (...)
            Unwarped version of `X_w`. `X` will have same type code as the :class:`.Categorical` class, which is
            unicode (``'U'``).
        """
        X_w = self.validate_warped(X_w, pre=True)

        X = self.round_to_values(self.unwarp_f(X_w))

        X = self.validate(X)
        check_array(X, "X", ndim=X_w.ndim - 1, kind=CAT_KIND)
        return X


# Treat ordinal identically to categorical for now
SPACE_DICT = {"real": Real, "int": Integer, "bool": Boolean, "cat": Categorical, "ordinal": Categorical}

# ============================================================================
# Setup code for joint spaces over multiple parameters with different configs
# ============================================================================


class JointSpace(object):
    """Combination of multiple :class:`.Space` objectives to transform multiple variables at the same time (jointly).
    """

    def __init__(self, meta):
        """Build Real space class.

        Parameters
        ----------
        meta : dict(str, dict)
            Configuration of variables in joint space. See API description.
        """
        assert len(meta) > 0  # Unclear what to do with empty space

        # Lock in an order if not ordered dict, sorted helps reproducibility
        self.param_list = sorted(meta.keys())

        # Might as well pre-validate a bit here
        for param, config in meta.items():
            assert config["type"] in SPACE_DICT, "invalid input type %s" % config["type"]

        spaces = {
            param: SPACE_DICT[config["type"]](
                config.get("space", None), config.get("values", None), config.get("range", None)
            )
            for param, config in meta.items()
        }
        self.spaces = spaces

        self.blocks = np.cumsum([len(spaces[param].get_bounds()) for param in self.param_list])

    def validate(self, X):
        """Raise `ValueError` if X does not match the format expected for a
        joint space."""
        for record in X:
            if self.param_list != sorted(record.keys()):
                raise ValueError("Expected joint space keys %s, but got %s", (self.param_list, sorted(record.keys())))
            for param in self.param_list:
                self.spaces[param].validate([record[param]], pre=True)
        # Return X back so we have option to cast it to list or whatever later
        return X

    def warp(self, X):
        """Warp inputs to a continuous space.

        Parameters
        ----------
        X : list(dict(str, object)) of shape (n,)
            List of `n` points in the joint space to warp. Each list element is a dictionary where each key corresponds
            to a variable in the joint space. Keys can be be missing in the records and the according warped variables
            will be ``nan``.

        Returns
        -------
        X_w : :class:`numpy:numpy.ndarray` of shape (n, m)
            Warped version of input space. Result is 2D `float` np array. `n` is the number of input points, length
            of `X`. `m` is the size of the joint warped space, which can be inferred by calling :func:`.get_bounds`.
        """
        # It would be nice to have cleaner way to deal with this corner case
        if len(X) == 0:
            return np.zeros((0, self.blocks[-1]), dtype=WARPED_DTYPE)

        X_w = [
            np.concatenate(
                [
                    self.spaces[param].warp(record[param])
                    if param in record
                    else np.full(len(self.spaces[param].get_bounds()), np.nan)
                    for param in self.param_list
                ]
            )
            for record in X
        ]
        X_w = np.stack(X_w, axis=0)
        check_array(X_w, "X", shape=(len(X), self.blocks[-1]), dtype=WARPED_DTYPE)
        return X_w

    def unwarp(self, X_w, fixed_vals={}):
        """Inverse of :func:`.warp`.

        Parameters
        ----------
        X_w : :class:`numpy:numpy.ndarray` of shape (n, m)
            Warped version of input space. Must be 2D `float` :class:`numpy:numpy.ndarray`. `n` is the number of
            separate points in the warped joint space. `m` is the size of the joint warped space, which can be inferred
            in advance by calling :func:`.get_bounds`.
        fixed_vals : dict
            Subset of variables we want to keep fixed in X. Unwarp checks that the unwarped version of `X_w` matches
            `fixed_vals` up to numerical error. Otherwise, an error is raised.

        Returns
        -------
        X : list(dict(str, object)) of shape (n,)
            List of `n` points in the joint space to warp. Each list element is a dictionary where each key corresponds
            to a variable in the joint space.
        """
        X_w = np.asarray(X_w)
        check_array(X_w, "X", ndim=2, shape_endswith=(self.blocks[-1],), dtype=WARPED_DTYPE)
        N = X_w.shape[0]

        # Use snap_to to make sure we get exact value (no-round off) for cases where we know expected answer
        X = {
            param: snap_to(self.spaces[param].unwarp(xx), fixed_vals.get(param, None))
            for param, xx in zip(self.param_list, np.hsplit(X_w, self.blocks[:-1]))
        }
        # Convert dict of arrays to list of dicts, this would not be needed if
        # we used pandas but we do not want to add it as a dep. np.asscalar and
        # .item() appear to be the same thing but asscalar seems more readable.
        X = [{param: X[param][ii].item() for param in self.param_list} for ii in range(N)]
        return X

    def get_bounds(self):
        """Get bounds of the warped joint space.

        Returns
        -------
        bounds : :class:`numpy:numpy.ndarray` of shape (m, 2)
            Bounds in the warped space. First column is the lower bound and the second column is the upper bound.
            ``bounds.tolist()`` gives the bounds in the standard form expected by scipy optimizers:
            ``[(lower_1, upper_1), ..., (lower_n, upper_n)]``.
        """
        bounds = np.concatenate([self.spaces[param].get_bounds() for param in self.param_list], axis=0)
        check_array(bounds, "bounds", shape_endswith=(2,), dtype=WARPED_DTYPE)
        return bounds

    def grid(self, max_interp=N_GRID_DEFAULT):
        """Return grid spanning the original (unwarped) space.

        Parameters
        ----------
        max_interp : int
            The number of points to use in grid space when a range and not values are used to define the space.
            Must be ``>= 0``.

        Returns
        -------
        axes : dict(str, list)
            Grids spanning the original spaces of each variable. For each variable, this is simply ``self.values``
            if a grid has already been specified, otherwise it is just grid across the range.
        """
        axes = {var_name: space.grid(max_interp=max_interp) for var_name, space in self.spaces.items()}
        return axes


================================================
FILE: bayesmark/stats.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""General statistic tools useful in the benchmark.
"""
import numpy as np
import scipy.stats as sst


def robust_standardize(X, q_level=0.5):
    """Perform robust standardization of data matrix `X` over axis 0.

    Similar to :func:`sklearn:sklearn.preprocessing.robust_scale` except also does a Gaussian
    adjustment rescaling so that if Gaussian data is passed in the transformed
    data will, in large `n`, be distributed as N(0,1). See sklearn feature
    request #10139 on github.

    Parameters
    ----------
    X : :class:`numpy:numpy.ndarray` of shape (n, ...)
        Array containing elements standardize. Require ``n >= 2``.
    q_level : scalar
        Must be in [0, 1]. Inter-quartile range to use for scale estimation.

    Returns
    -------
    X : :class:`numpy:numpy.ndarray` of shape (n, ...)
        Elements of input `X` standardization.
    """
    X = np.asarray(X)
    assert X.ndim in (1, 2)
    assert np.all(np.isfinite(X))
    assert 0.0 < q_level and q_level <= 1.0
    assert X.shape[0] >= 2

    mu = np.median(X, axis=0)

    q0, q1 = 0.5 * (1.0 - q_level), 0.5 * (1.0 + q_level)
    v = np.percentile(X, 100 * q1, axis=0) - np.percentile(X, 100 * q0, axis=0)
    v = np.asarray(v)
    v[v == 0.0] = 1.0

    X_ss = (X - mu) / v
    # Rescale to match scale of N(0,1)
    X_ss = X_ss * (sst.norm.ppf(q1) - sst.norm.ppf(q0))
    assert X.shape == X_ss.shape
    return X_ss


def t_EB(x, alpha=0.05, axis=-1):
    """Get t-statistic based error bars on mean of `x`.

    Parameters
    ----------
    x : :class:`numpy:numpy.ndarray` of shape (n_samples,)
        Data points to estimate mean. Must not be empty or contain ``NaN``.
    alpha : float
        The alpha level (``1-confidence``) probability (in (0, 1)) to construct confidence interval from t-statistic.
    axis : int
        The axis on `x` where we compute the t-statistics. The function is vectorized over all other dimensions.

    Returns
    -------
    EB : float
        Size of error bar on mean (``>= 0``). The confidence interval is ``[mean(x) - EB, mean(x) + EB]``. `EB` is
        ``inf`` when ``len(x) <= 1``. Will be ``NaN`` if there are any infinite values in `x`.
    """
    assert np.ndim(x) >= 1 and (not np.any(np.isnan(x)))
    assert np.ndim(alpha) == 0
    assert 0.0 < alpha and alpha < 1.0

    N = np.shape(x)[axis]
    if N <= 1:
        return np.full(np.sum(x, axis=axis).shape, fill_value=np.inf)

    confidence = 1 - alpha
    # loc cancels out when we just want EB anyway
    LB, UB = sst.t.interval(confidence, N - 1, loc=0.0, scale=1.0)
    assert not (LB > UB)
    # Just multiplying scale=ss.sem(x) is better for when scale=0
    EB = 0.5 * sst.sem(x, axis=axis) * (UB - LB)
    return EB


================================================
FILE: bayesmark/util.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""General utilities that should arguably be included in Python.
"""
import shlex


def in_or_none(x, L):
    """Check if item is in list of list is None."""
    return (L is None) or (x in L)


def all_unique(L):
    """Check if all elements in a list are unique.

    Parameters
    ----------
    L : list
        List we would like to check for uniqueness.

    Returns
    -------
    uniq : bool
        True if all elements in `L` are unique.
    """
    uniq = len(L) == len(set(L))
    return uniq


def strict_sorted(L):
    """Return a strictly sorted version of `L`. Therefore, this raises an error if `L` contains duplicates.

    Parameters
    ----------
    L : list
        List we would like to sort.

    Returns
    -------
    S : list
        Strictly sorted version of `L`.
    """
    assert all_unique(L), "Cannot strict sort because list contains duplicates."
    S = sorted(L)
    return S


def range_str(stop):
    """Version of ``range(stop)`` that instead returns strings that are zero padded so the entire iteration is of the
    same length.

    Parameters
    ----------
    stop : int
        Stop value equivalent to ``range(stop)``.

    Yields
    ------
    x : str
        String representation of integer zero padded so all items from this generator have the same ``len(x)``.
    """
    str_len = len(str(stop - 1))  # moot if stop=0

    def map_(x):
        ss = str(x).zfill(str_len)
        return x, ss

    G = map(map_, range(stop))
    return G


def str_join_safe(delim, str_vec, append=False):
    """Version of `str.join` that is guaranteed to be invertible.

    Parameters
    ----------
    delim : str
        Delimiter to join the strings.
    str_vec : list(str)
        List of strings to join. A `ValueError` is raised if `delim` is present in any of these strings.
    append : bool
        If true, assume the first element is already joined and we are appending to it. So, `str_vec[0]` can contain
        `delim`.

    Returns
    -------
    joined_str : str
        Joined version of `str_vec`, which is always recoverable with ``joined_str.split(delim)``.

    Examples
    --------
    Append is required because,

    .. code-block:: pycon

        ss = str_join_safe('_', ('foo', 'bar'))
        str_join_safe('_', (ss, 'baz', 'qux'))

    would fail because we are appending ``'baz'`` and ``'qux'`` to the already joined string ``ss = 'foo_bar'``.

    In this case, we use

    .. code-block:: pycon

        ss = str_join_safe('_', ('foo', 'bar'))
        str_join_safe('_', (ss, 'baz', 'qux'), append=True)
    """
    chk_vec = str_vec[1:] if append else str_vec

    for ss in chk_vec:
        if delim in ss:
            raise ValueError("%s cannot contain delimeter %s" % (ss, delim))

    joined_str = delim.join(str_vec)
    return joined_str


def shell_join(argv, delim=" "):
    """Join strings together in a way that is an inverse of `shlex` shell parsing into `argv`.

    Basically, if the resulting string is passed as a command line argument then `sys.argv` will equal `argv`.

    Parameters
    ----------
    argv : list(str)
        List of arguments to collect into command line string. It will be escaped accordingly.
    delim : str
        Whitespace delimiter to join the strings.

    Returns
    -------
    cmd : str
        Properly escaped and joined command line string.
    """
    vv = [shlex.quote(vv) for vv in argv]
    cmd = delim.join(vv)
    assert shlex.split(cmd) == list(argv)
    return cmd


def chomp(str_val, ext="\n"):
    """Chomp a suffix off a string.

    Parameters
    ----------
    str_val : str
        String we want to chomp off a suffix, e.g., ``"foo.log"``, and we want to chomp the file extension.
    ext : str
        The suffix we want to chomp. An error is raised if `str_val` doesn't end in `ext`.

    Returns
    -------
    chomped : str
        Version of `str_val` with `ext` removed from the end.
    """
    n = len(ext)
    assert n > 0

    chomped, ext_ = str_val[:-n], str_val[-n:]
    assert ext == ext_, "%s must end with %s" % (repr(str_val), repr(ext))
    return chomped


def preimage_func(f, x):
    """Pre-image a funcation at a set of input points.

    Parameters
    ----------
    f : typing.Callable
        The function we would like to pre-image. The output type must be hashable.
    x : typing.Iterable
        Input points we would like to evaluate `f`. `x` must be of a type acceptable by `f`.

    Returns
    -------
    D : dict(object, list(object))
        This dictionary maps the output of `f` to the list of `x` values that produce it.
    """
    D = {}
    for xx in x:
        D.setdefault(f(xx), []).append(xx)
    return D


================================================
FILE: bayesmark/xr_util.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""General utilities for `xarray` that should be included in `xarray`.
"""
from collections import OrderedDict

import numpy as np
import pandas as pd
import xarray as xr

from bayesmark.util import all_unique


def is_simple_coords(coords, min_side=0, dims=None):
    """Check if all xr coordinates are "simple". That is, equals to ``np.arange(n)``.

    Parameters
    ----------
    coords : dict-like of coordinates
        The coordinates we would like to check, e.g. from ``DataArray.coords``.
    min_side : int
        The minimum side requirement. We can set this ``min_side=1`` and have empty coordinates result in a return
        value of ``False``.
    dims : None or list of dimension names
        Dimensions we want to check for simplicity. If ``None``, check all dimensions.

    Returns
    -------
    simple : bool
        True when all coordinates are simple.
    """
    for kk in coords:
        if (dims is None) or (kk in dims):
            C = coords[kk].values
            # Not checking dtype on empty coords, could check that too if we want to be strict
            if len(C) > 0 and C.dtype != np.int_:
                return False

            C = C.tolist()
            if len(C) < min_side:
                return False
            if C != list(range(len(C))):
                return False
    return True


def ds_like(ref, vars_, dims, fill=np.nan):
    """Produce a blank :class:`xarray:xarray.Dataset` copying some coordinates from another
    :class:`xarray:xarray.Dataset`.

    Parameters
    ----------
    ref : :class:`xarray:xarray.Dataset`
        The reference dataset we want to copy coordinates from.
    vars_ : typing.Iterable
        List of variable names we want in the new dataset.
    dims : list
        List of dimensions we want to copy over from `ref`. These are the dimensions of the output.
    fill : scalar
        Scalar value to fill the blank dataset. The `dtype` will be determined from the `fill` value.

    Returns
    -------
    ds : :class:`xarray:xarray.Dataset`
        A new dataset with variables `vars_` and dimensions `dims` where the coordinates have been copied from `ref`.
        All values are filled with `fill`.
    """
    size = [ref.sizes[dd] for dd in dims]

    # Use OrderedDict for good measure, probably not needed
    data = OrderedDict([(vv, (dims, np.full(size, fill))) for vv in vars_])
    coords = OrderedDict([(dd, ref.coords[dd].values) for dd in dims])
    ds = xr.Dataset(data, coords=coords)
    return ds


def ds_like_mixed(ref, vars_, dims, fill=np.nan):
    """The same as `ds_like` but allow different dimensions for each variable.

    Parameters
    ----------
    ref : :class:`xarray:xarray.Dataset`
        The reference dataset we want to copy coordinates from.
    vars_ : typing.Iterable
        List of (variable names, dimension) pairs we want in the new dataset. The dimensions for each variable must be
        a subset of `dims`.
    dims : list
        List of all dimensions we want to copy over from `ref`.
    fill : scalar
        Scalar value to fill the blank dataset. The `dtype` will be determined from the `fill` value.

    Returns
    -------
    ds : :class:`xarray:xarray.Dataset`
        A new dataset with variables `vars_` and dimensions `dims` where the coordinates have been copied from `ref`.
        All values are filled with `fill`.
    """
    coords = OrderedDict([(dd, ref.coords[dd].values) for dd in dims])

    data = OrderedDict()
    for var_name, var_dims in vars_:
        assert set(var_dims).issubset(dims)
        size = [ref.sizes[dd] for dd in var_dims]
        data[var_name] = (var_dims, np.full(size, np.nan))
    ds = xr.Dataset(data, coords=coords)
    return ds


def only_dataarray(ds):
    """Convert a :class:`xarray:xarray.Dataset` to a :class:`xarray:xarray.DataArray`. If the
    :class:`xarray:xarray.Dataset` has more than one variable, an error is raised.

    Parameters
    ----------
    ds : :class:`xarray:xarray.Dataset`
        :class:`xarray:xarray.Dataset` we would like to convert to a :class:`xarray:xarray.DataArray`. This must
        contain only one variable.

    Returns
    -------
    da : :class:`xarray:xarray.DataArray`
        The :class:`xarray:xarray.DataArray` extracted from `ds`.
    """
    name, = ds
    da = ds[name]
    return da


def coord_compat(da_seq, dims):
    """Check if a sequence of :class:`xarray:xarray.DataArray` have compatible coordinates.

    Parameters
    ----------
    da_seq : list(:class:`xarray:xarray.DataArray`)
        Sequence of :class:`xarray:xarray.DataArray` we would like to check for compatibility.
        :class:`xarray:xarray.Dataset` work too.
    dims : list
        Subset of all dimensions in the :class:`xarray:xarray.DataArray` we are concerned with for compatibility.

    Returns
    -------
    compat : bool
        True if all the :class:`xarray:xarray.DataArray` have compatible coordinates.
    """
    if len(da_seq) <= 1:
        return True

    ref = da_seq[0]
    for da in da_seq:
        # There is probably a better way to do this by attempting concat in try-except, but good enough for now:
        for dd in dims:
            assert dd in da.coords, "dim %s missing in dataarray" % dd
            if not np.all(ref.coords[dd].values == da.coords[dd].values):
                return False
    return True


def da_to_string(da):
    """Generate a human readable version of a 1D :class:`xarray:xarray.DataArray`.

    Parameters
    ----------
    da : :class:`xarray:xarray.DataArray`
        The :class:`xarray:xarray.DataArray` to display. Must only have one dimension.

    Returns
    -------
    str_val : str
        String with human readable version of `da`.
    """
    assert len(da.dims) == 1
    str_val = da.to_series().to_string()
    return str_val


def da_concat(da_dict, dims):
    """Concatenate a dictionary of :class:`xarray:xarray.DataArray` similar to :func:`pandas:pandas.concat`.

    Parameters
    ----------
    da_dict : dict(tuple(str), :class:`xarray:xarray.DataArray`)
        Dictionary of :class:`xarray:xarray.DataArray` to combine. The keys are tuples of index values. The
        :class:`xarray:xarray.DataArray` must have compatible coordinates.
    dims : list(str)
        The names of the new dimensions we create for the dictionary keys. This must be of the same length as the
        key tuples in `da_dict`.

    Returns
    -------
    da : :class:`xarray:xarray.DataArray`
        Combined data array. The new dimensions will be ``input_da.dims + dims``.
    """
    assert len(da_dict) > 0
    assert all(len(da.dims) > 0 for da in da_dict.values()), "0-dimensional DataArray not supported"
    assert all_unique(dims)

    cur_dims = list(da_dict.values())[0].dims
    assert all(da.dims == cur_dims for da in da_dict.values())
    assert len(set(cur_dims) & set(dims)) == 0

    def squeeze(tt):
        if len(tt) == 1:
            return tt[0]
        return tt

    D = OrderedDict([(squeeze(kk), da.to_series()) for kk, da in da_dict.items()])
    df = pd.concat(D, axis=1)

    assert df.columns.nlevels == len(dims)
    df.columns.names = dims

    df = df.stack(level=list(range(df.columns.nlevels)))
    assert isinstance(df, pd.Series)
    da = df.to_xarray()
    assert isinstance(da, xr.DataArray)
    return da


def ds_concat(ds_dict, dims):
    """Concatenate a dictionary of :class:`xarray:xarray.Dataset` similar to :func:`pandas:pandas.concat`, and a
    generalization of :func:`.da_concat`.

    Parameters
    ----------
    ds_dict : dict(tuple(str), :class:`xarray:xarray.DataArray`)
        Dictionary of :class:`xarray:xarray.Dataset` to combine. The keys are tuples of index values. The
        :class:`xarray:xarray.Dataset` must have compatible coordinates, and all have the same variables.
    dims : list(str)
        The names of the new dimensions we create for the dictionary keys. This must be of the same length as the
        key tuples in `ds_dict`.

    Returns
    -------
    ds : :class:`xarray:xarray.Dataset`
        Combined dataset. For each variable `var`, the new dimensions will be ``input_ds[var].dims + dims``.
    """
    assert len(ds_dict) > 0
    assert len(dims) > 0
    assert all(len(kk) == len(dims) for kk in ds_dict)

    # Get an arbitrary element as the reference
    k0 = list(ds_dict.keys())[0]

    # Check all vars the same
    vars_, = set([tuple(ds) for ds in ds_dict.values()])

    # Now combine da for each variable, one at a time
    ds = xr.Dataset(coords=ds_dict[k0].coords)
    for vv in vars_:
        da_dict = OrderedDict([(kk, da[vv]) for kk, da in ds_dict.items()])
        ds[vv] = da_concat(da_dict, dims)

    return ds


================================================
FILE: build_wheel.sh
================================================
#!/bin/bash

set -ex
set -o pipefail

# Display what version is being used for logging
python --version

# Fail if untracked files so we don't delete them in next step
test -z "$(git status --porcelain)"

# Build from clean repo, delete all ignored files
git clean -x -f -d

# Get everything in place to put inside the wheel
SHA_LONG=$(git rev-parse HEAD)
echo VERSION=\"$SHA_LONG\" >bayesmark/version.py

# Now the actual build
python3 setup.py sdist


================================================
FILE: docs/.gitignore
================================================
_build


================================================
FILE: docs/Makefile
================================================
# Makefile for Sphinx documentation
#

# You can set these variables from the command line.
SPHINXOPTS    =
SPHINXBUILD   = sphinx-build
PAPER         =
BUILDDIR      = _build

# User-friendly check for sphinx-build
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
endif

# Internal variables.
PAPEROPT_a4     = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
# the i18n builder cannot share the environment and doctrees with the others
I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .

.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp latex latexpdf text man texinfo info gettext changes xml pseudoxml linkcheck all

help:
	@echo "Please use \`make <target>' where <target> is one of"
	@echo "  html       to make standalone HTML files"
	@echo "  dirhtml    to make HTML files named index.html in directories"
	@echo "  singlehtml to make a single large HTML file"
	@echo "  pickle     to make pickle files"
	@echo "  json       to make JSON files"
	@echo "  htmlhelp   to make HTML files and a HTML help project"
	@echo "  qthelp     to make HTML files and a qthelp project"
	@echo "  devhelp    to make HTML files and a Devhelp project"
	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
	@echo "  text       to make text files"
	@echo "  man        to make manual pages"
	@echo "  texinfo    to make Texinfo files"
	@echo "  info       to make Texinfo files and run them through makeinfo"
	@echo "  gettext    to make PO message catalogs"
	@echo "  changes    to make an overview of all changed/added/deprecated items"
	@echo "  xml        to make Docutils-native XML files"
	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
	@echo "  linkcheck  to check all external links for integrity"

clean:
	rm -rf $(BUILDDIR)/*

html:
	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
	@echo
	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."

dirhtml:
	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
	@echo
	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."

singlehtml:
	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
	@echo
	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."

pickle:
	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
	@echo
	@echo "Build finished; now you can process the pickle files."

json:
	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
	@echo
	@echo "Build finished; now you can process the JSON files."

htmlhelp:
	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
	@echo
	@echo "Build finished; now you can run HTML Help Workshop with the" \
	      ".hhp project file in $(BUILDDIR)/htmlhelp."

qthelp:
	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
	@echo
	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/bayesmark.qhcp"
	@echo "To view the help file:"
	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/bayesmark.qhc"

devhelp:
	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
	@echo
	@echo "Build finished."
	@echo "To view the help file:"
	@echo "# mkdir -p $$HOME/.local/share/devhelp/bayesmark"
	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/bayesmark"
	@echo "# devhelp"

latex:
	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
	@echo
	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
	@echo "Run \`make' in that directory to run these through (pdf)latex" \
	      "(use \`make latexpdf' here to do that automatically)."

latexpdf:
	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
	@echo "Running LaTeX files through pdflatex..."
	$(MAKE) -C $(BUILDDIR)/latex all-pdf
	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."

text:
	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
	@echo
	@echo "Build finished. The text files are in $(BUILDDIR)/text."

man:
	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
	@echo
	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."

texinfo:
	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
	@echo
	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
	@echo "Run \`make' in that directory to run these through makeinfo" \
	      "(use \`make info' here to do that automatically)."

info:
	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
	@echo "Running Texinfo files through makeinfo..."
	make -C $(BUILDDIR)/texinfo info
	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."

gettext:
	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
	@echo
	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."

changes:
	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
	@echo
	@echo "The overview file is in $(BUILDDIR)/changes."

linkcheck:
	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
	@echo
	@echo "Link check complete; look for any errors in the above output " \
	      "or in $(BUILDDIR)/linkcheck/output.txt."

xml:
	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
	@echo
	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."

pseudoxml:
	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
	@echo
	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."

all: html dirhtml singlehtml pickle json htmlhelp qthelp devhelp latex latexpdf text man texinfo info gettext changes xml pseudoxml linkcheck


================================================
FILE: docs/authors.rst
================================================
-------
Credits
-------

~~~~~~~~~~~~~~~~
Development lead
~~~~~~~~~~~~~~~~

Ryan Turner (rdturnermtl)

~~~~~~~~~~~~
Contributors
~~~~~~~~~~~~

* David Eriksson (dme65)


================================================
FILE: docs/code.rst
================================================
-------------
Code Overview
-------------

.. _bayesmark:

~~~~
Data
~~~~

.. automodule:: bayesmark.data
   :members:
   :exclude-members:

~~~~~~~~~~~~~~~~~~~~~~~
Expected Max Estimation
~~~~~~~~~~~~~~~~~~~~~~~

.. automodule:: bayesmark.expected_max
   :members:
   :exclude-members:

~~~~~~~~~~~~~~~~~~~~~~
Experiment Aggregation
~~~~~~~~~~~~~~~~~~~~~~

.. automodule:: bayesmark.experiment_aggregate
   :members:
   :exclude-members: main

~~~~~~~~~~~~~~~~~~~
Experiment Analysis
~~~~~~~~~~~~~~~~~~~

.. automodule:: bayesmark.experiment_analysis
   :members:
   :exclude-members: main

~~~~~~~~~~~~~~~~~~~
Experiment Baseline
~~~~~~~~~~~~~~~~~~~

.. automodule:: bayesmark.experiment_baseline
   :members:
   :exclude-members: main, do_baseline

~~~~~~~~~~~~~~~~~~~
Experiment Launcher
~~~~~~~~~~~~~~~~~~~

.. automodule:: bayesmark.experiment_launcher
   :members:
   :exclude-members: main

~~~~~~~~~~
Experiment
~~~~~~~~~~

.. automodule:: bayesmark.experiment
   :members:
   :exclude-members: experiment_main

~~~~~~~~~~~~~~~~~~~
Function Signatures
~~~~~~~~~~~~~~~~~~~

.. automodule:: bayesmark.signatures
   :members:
   :exclude-members:

~~~~~~~~~~
Numpy Util
~~~~~~~~~~

.. automodule:: bayesmark.np_util
   :members:
   :exclude-members:

~~~~~~~~~
Path Util
~~~~~~~~~

.. automodule:: bayesmark.path_util
   :members:
   :exclude-members:

~~~~~~~~~~~~~~~~~~~
Quantile Estimation
~~~~~~~~~~~~~~~~~~~

.. automodule:: bayesmark.quantiles
   :members:
   :exclude-members: ensure_shape

~~~~~~~~~~~~~
Random Search
~~~~~~~~~~~~~

.. automodule:: bayesmark.random_search
   :members:
   :exclude-members:

~~~~~~~~~~~~~
Serialization
~~~~~~~~~~~~~

.. automodule:: bayesmark.serialize
   :members:
   :exclude-members: Serializer

~~~~~~~~~~~~~~
Sklearn Tuning
~~~~~~~~~~~~~~

.. automodule:: bayesmark.sklearn_funcs
   :members:
   :exclude-members:

~~~~~
Space
~~~~~

.. automodule:: bayesmark.space
   :members:
   :exclude-members: check_array, unravel_index

~~~~~
Stats
~~~~~

.. automodule:: bayesmark.stats
   :members:
   :exclude-members:

~~~~~~~~~~~~~~
Util (General)
~~~~~~~~~~~~~~

.. automodule:: bayesmark.util
   :members:
   :exclude-members:

~~~~~~~~~~~
Xarray Util
~~~~~~~~~~~

.. automodule:: bayesmark.xr_util
   :members:
   :exclude-members:


================================================
FILE: docs/conf.py
================================================
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# bayesmark documentation build configuration file.
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.

import os
import sys

# If extensions (or modules to document with autodoc) are in another
# directory, add these directories to sys.path here. If the directory is
# relative to the documentation root, use os.path.abspath to make it
# absolute, like shown here.
sys.path.insert(0, os.path.abspath(".."))
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))

# Get the project root dir, which is the parent dir of this
cwd = os.getcwd()
project_root = os.path.dirname(cwd)

# Insert the project root dir as the first element in the PYTHONPATH.
# This lets us ensure that the source package is imported, and that its
# version is used.
sys.path.insert(0, project_root)

# -- General configuration ---------------------------------------------

# If your documentation needs a minimal Sphinx version, state it here.
# needs_sphinx = '1.0'

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = ["sphinx.ext.autodoc", "sphinx.ext.viewcode", "sphinx.ext.intersphinx", "sphinx.ext.napoleon"]

# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]

# The suffix of source filenames.
source_suffix = {".rst": "restructuredtext", ".txt": "markdown", ".md": "markdown"}

# The encoding of source files.
# source_encoding = 'utf-8-sig'

# The master toctree document.
master_doc = "index"

# General information about the project.
project = "bayesmark"
copyright = "2018-2019"

# The version info for the project you're documenting, acts as replacement
# for |version| and |release|, also used in various other places throughout
# the built documents.
#
# The short X.Y version.
# version = bayesmark.__version__
# The full version, including alpha/beta/rc tags.
# release = bayesmark.__version__

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
# language = None

# There are two options for replacing |today|: either, you set today to
# some non-false value, then it is used:
# today = ''
# Else, today_fmt is used as the format for a strftime call.
# today_fmt = '%B %d, %Y'

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ["_build"]

# The reST default role (used for this markup: `text`) to use for all
# documents.
# default_role = None

# If true, '()' will be appended to :func: etc. cross-reference text.
# add_function_parentheses = True

# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
# add_module_names = True

# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
# show_authors = False

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = "sphinx"

# A list of ignored prefixes for module index sorting.
# modindex_common_prefix = []

# If true, keep warnings as "system message" paragraphs in the built
# documents.
# keep_warnings = False

intersphinx_mapping = {
    "python": ("https://docs.python.org/3/", None),
    "numpy": ("https://docs.scipy.org/doc/numpy-1.16.1/", None),
    "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
    "xarray": ("http://xarray.pydata.org/en/stable/", None),
    "sklearn": ("https://scikit-learn.org/stable/", None),
}

# -- Options for HTML output -------------------------------------------

# The theme to use for HTML and HTML Help pages.  See the documentation for
# a list of builtin themes.
html_theme = "default"

# Theme options are theme-specific and customize the look and feel of a
# theme further.  For a list of options available for each theme, see the
# documentation.
# html_theme_options = {}

# Add any paths that contain custom themes here, relative to this directory.
# html_theme_path = []

# The name for this set of Sphinx documents.  If None, it defaults to
# "<project> v<release> documentation".
# html_title = None

# A shorter title for the navigation bar.  Default is the same as
# html_title.
# html_short_title = None

# The name of an image file (relative to this directory) to place at the
# top of the sidebar.
# html_logo = None

# The name of an image file (within the static path) to use as favicon
# of the docs.  This file should be a Windows icon file (.ico) being
# 16x16 or 32x32 pixels large.
# html_favicon = None

# Add any paths that contain custom static files (such as style sheets)
# here, relative to this directory. They are copied after the builtin
# static files, so a file named "default.css" will overwrite the builtin
# "default.css".
html_static_path = []

# If not '', a 'Last updated on:' timestamp is inserted at every page
# bottom, using the given strftime format.
# html_last_updated_fmt = '%b %d, %Y'

# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
# html_use_smartypants = True

# Custom sidebar templates, maps document names to template names.
# html_sidebars = {}

# Additional templates that should be rendered to pages, maps page names
# to template names.
# html_additional_pages = {}

# If false, no module index is generated.
# html_domain_indices = True

# If false, no index is generated.
# html_use_index = True

# If true, the index is split into individual pages for each letter.
# html_split_index = False

# If true, links to the reST sources are added to the pages.
# html_show_sourcelink = True

# If true, "Created using Sphinx" is shown in the HTML footer.
# Default is True.
# html_show_sphinx = True

# If true, "(C) Copyright ..." is shown in the HTML footer.
# Default is True.
# html_show_copyright = True

# If true, an OpenSearch description file will be output, and all pages
# will contain a <link> tag referring to it.  The value of this option
# must be the base URL from which the finished HTML is served.
# html_use_opensearch = ''

# This is the file name suffix for HTML files (e.g. ".xhtml").
# html_file_suffix = None

# Output file base name for HTML help builder.
htmlhelp_basename = "bayesmark_doc"


# -- Options for LaTeX output ------------------------------------------

latex_elements = {
    # The paper size ('letterpaper' or 'a4paper').
    #'papersize': 'letterpaper',
    # The font size ('10pt', '11pt' or '12pt').
    #'pointsize': '10pt',
    # Additional stuff for the LaTeX preamble.
    #'preamble': '',
}

# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass
# [howto/manual]).
latex_documents = [("index", "bayesmark.tex", "BO Benchmark Documentation", "Uber AI Labs", "manual")]

# The name of an image file (relative to this directory) to place at
# the top of the title page.
# latex_logo = None

# For "manual" documents, if this is true, then toplevel headings
# are parts, not chapters.
# latex_use_parts = False

# If true, show page references after internal links.
# latex_show_pagerefs = False

# If true, show URL addresses after external links.
# latex_show_urls = False

# Documents to append as an appendix to all manuals.
# latex_appendices = []

# If false, no module index is generated.
# latex_domain_indices = True


# -- Options for manual page output ------------------------------------

# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [("index", "bayesmark", "bayesmark Documentation", ["Uber AI Labs"], 1)]

# If true, show URL addresses after external links.
# man_show_urls = False


# -- Options for Texinfo output ----------------------------------------

# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
#  dir menu entry, description, category)
texinfo_documents = [
    (
        "index",
        "bayesmark",
        "bayesmark Documentation",
        "Uber AI Labs",
        "bayesmark",
        "Benchmark of Bayesian optimization packages on real problems.",
        "Miscellaneous",
    )
]

# Documents to append as an appendix to all manuals.
# texinfo_appendices = []

# If false, no module index is generated.
# texinfo_domain_indices = True

# How to display URL addresses: 'footnote', 'no', or 'inline'.
# texinfo_show_urls = 'footnote'

# If true, do not generate a @detailmenu in the "Top" node's menu.
# texinfo_no_detailmenu = False


================================================
FILE: docs/dummy.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sphinx

# import extra deps and use it to keep pipreqs and flake8 happy
for pkg in (sphinx,):
    print("%s %s" % (pkg.__name__, pkg.__version__))


================================================
FILE: docs/index.rst
================================================
.. bayesmark documentation master file, created by
   sphinx-quickstart on Tue Jul  9 22:26:36 2013.
   You can adapt this file completely to your liking, but it should at least
   contain the root `toctree` directive.

Welcome to the Bayes Opt Benchmark Documentation
================================================

Contents:

.. toctree::
   :maxdepth: 2

   readme
   scoring
   code
   authors


================================================
FILE: docs/readme.rst
================================================
.. include:: ../README.rst


================================================
FILE: docs/scoring.rst
================================================
.. _how-scoring-works:

How scoring works
=================

The scoring system is about aggregating the function evaluations of the optimizers. We represent :math:`F_{pmtn}` as the function evaluation of objective function :math:`p` (``TEST_CASE``) from the suggestion of method :math:`m` (``METHOD``) at batch :math:`t` (``ITER``) under repeated trial :math:`n` (``TRIAL``). In the case of batch sizes greater than 1, :math:`F_{pmtn}` is the minimum function evaluation across the suggestions in batch :math:`t`. The first transformation is that we consider the *cumulative minimum* over batches :math:`t` as the performance of the optimizer on a particular trial:

.. math::

   S_{pmtn} = \textrm{cumm-min}_t F_{pmtn}\,.

All of the aggregate quantities described here are computed by :func:`.experiment_analysis.compute_aggregates` (which is called by `bayesmark-anal <#analyze-and-summarize-results>`_) in either the ``agg_result`` or ``summary`` xarray datasets. Additionally, the baseline performances are in the xarray dataset ``baseline_ds`` from :func:`.experiment_baseline.compute_baseline`. The baseline dataset can be generated via the ``bayesmark-baseline`` command, but it is called automatically by ``bayesmark-anal`` if needed.

Median scores
-------------

The more robust, but less decision-theoretically appealing method for aggregation is to look at median scores. On a per problem basis we simply consider the median (``agg_result[PERF_MED]``):

.. math::

   \textrm{med-perf}_{pmt} = \textrm{median}_n \, S_{pmtn} \,.

However, this score is not very comparable across different problems as the objectives are all on different scales with possible different units. Therefore, we decide the *normalized score* (``agg_result[NORMED_MED]``) in a way that is *invariant* to linear transformation of the objective function:

.. math::

   \textrm{norm-med-perf}_{pmt} = \frac{\textrm{med-perf}_{pmt}  - \textrm{opt}_p}{\textrm{rand-med-perf}_{pt} - \textrm{opt}_p} \,,

where :math:`\textrm{opt}_p` (``baseline_ds[PERF_BEST]``) is an estimate of the global minimum of objective function :math:`p`; and :math:`\textrm{rand-med-perf}_{pt}` is the median performance of random search at batch :math:`t` on objective function :math:`p`. This means that, on any objective, an optimizer has score 0 after converging to the global minimum; and random search performs as a straight line at 1 for all :math:`t`. Conceptually, the median random search performance (``baseline_ds[PERF_MED]``) is computed as:

.. math::

   \textrm{rand-med-perf}_{pt} = \textrm{median}_n \, S_{pmtn} \,,

with :math:`m=` random search. However, every observation of :math:`F_{pmtn}` is iid in the case of random search. There is no reason to break the samples apart into trials :math:`n`. Instead, we use the function :func:`.quantiles.min_quantile_CI` to compute a more statistically efficient pooled estimator using the pooled random search samples over :math:`t` and :math:`n`. This pooled method is a nonparametric estimator of the quantiles of the minimum over a batch of samples, which is distribution free.

To further aggregate the performance over all objectives for a single optimizer we can consider the median-of-medians (``summary[PERF_MED]``):

.. math::

   \textrm{med-perf}_{mt} = \textrm{median}_p \, \textrm{norm-med-perf}_{pmt} \,.

Combining scores across different problems is sensible here because we have transformed them all onto the same scale.

Mean scores
-----------

From a decision theoretical perspective it is more sensible to consider the mean (possible warped) score. The median score can hide a high percentage of runs that completely fail. However, when we look at the mean score we first take the clipped score with a baseline value:

.. math::

   S'_{pmtn} = \min(S_{pmtn}, \textrm{clip}_p) \,.

This is largely because there may be a non-zero probably of :math:`F = \infty` (as in when the objective function crashes), which means that mean random search performance is infinite loss. We set :math:`\textrm{clip}_p` (``baseline_ds[PERF_CLIP]``) to the median score after a single function evaluation, which is :math:`\textrm{rand-med-perf}_{p0}` for a batch size of 1. The mean performance on a single problem (``agg_result[PERF_MEAN]``) then becomes:

.. math::

   \textrm{mean-perf}_{pmt} = \textrm{mean}_n \, S'_{pmtn} \,.

Which then becomes a normalized performance (``agg_result[NORMED_MEAN]``) of:

.. math::

   \textrm{norm-mean-perf}_{pmt} = \frac{\textrm{mean-perf}_{pmt}  - \textrm{opt}_p}{\textrm{clip}_p  - \textrm{opt}_p} \,.

Note there that the random search performance is only 1 at the first batch unlike for :math:`\textrm{norm-med-perf}_{pmt}`.

Again we can aggregate this into all objective function performance with (``summary[PERF_MEAN]``):

.. math::

   \textrm{mean-perf}_{mt} = \textrm{mean}_p \, \textrm{norm-mean-perf}_{pmt} \,,

which is a mean-of-means (or *grand mean*), which is much more sensible in general than a median-of-medians. We can again obtain the property of random search having a constant performance of 1 for all :math:`t` using (``summary[NORMED_MEAN]``):

.. math::

   \textrm{norm-mean-perf}_{mt} = \frac{\textrm{mean-perf}_{mt}}{\textrm{rand-mean-perf}_{t}} \,,

where the random search baseline has been determined with the same sequence of equations as the other methods. These all collapse down to:

.. math::

   \textrm{rand-mean-perf}_{t} = \textrm{mean}_p \, \frac{\textrm{rand-mean-perf}_{pt} - \textrm{opt}_p}{\textrm{clip}_p  - \textrm{opt}_p} \,.

Conceptually, we compute this random search baseline (``baseline_ds[PERF_MEAN]``) as:

.. math::

   \textrm{rand-mean-perf}_{pt} = \textrm{mean}_n \, S'_{pmtn} \,,

with :math:`m=` random search. However, because all function evaluations for random search are iid across :math:`t`, we can use a more statistically efficient pooled estimator :func:`.expected_max.expected_min`, which is an unbiased distribution free estimator on the expected minimum of :math:`m` samples from a distribution.

Note that :math:`\textrm{norm-mean-perf}_{mt}` is, in aggregate, a linear transformation on the expected loss :math:`S'`. This makes it more justified in a decision theory framework than the median score. However, to view it as a linear transformation we are considering the values in ``baseline_ds`` to be fixed reference losses values and not the output from the experiment.

Error bars
----------

The datasets ``agg_result`` and ``summary`` also compute error bars in the form of ``LB_`` and ``UB_`` variables. These error bars do not consider the random variation in the baseline quantities from ``baseline_ds`` like ``opt`` and ``clip``. They are instead treated as fixed constant reference points. Therefore, they are computed by a different command ``bayesmark-baseline``. The user can generate the baselines when they want, but since they are not considered a random quantity in the statistics they are not automatically generated from the experimental data (unless the baseline file ``derived/baseline.json`` is missing).

Additionally, the error bars on the grand mean (``summary[PERF_MEAN]``) are computed by simply using t-statistic based error bars on the individual means. Under a "random effects" model, this does not actually lose any statistical power. However, this is computing the mean on the loss over sampling from new problems under the "same distribution" of benchmark problems. These error bars will be wider than if we computed the error bars on the grand mean over this particular set of benchmark problems.


================================================
FILE: example_opt_root/config.json
================================================
{
    "Flaky": [
        "flaky_optimizer.py",
        {}
    ],
    "HyperOpt-New": [
        "hyperopt_optimizer.py",
        {}
    ],
    "Nevergrad-OnePlusOne-New": [
        "nevergrad_optimizer.py",
        {
            "budget": 300,
            "tool": "OnePlusOne"
        }
    ],
    "OpenTuner-BanditA-New": [
        "opentuner_optimizer.py",
        {
            "techniques": [
                "AUCBanditMetaTechniqueA"
            ]
        }
    ],
    "OpenTuner-GA-DE-New": [
        "opentuner_optimizer.py",
        {
            "techniques": [
                "PSO_GA_DE"
            ]
        }
    ],
    "OpenTuner-GA-New": [
        "opentuner_optimizer.py",
        {
            "techniques": [
                "PSO_GA_Bandit"
            ]
        }
    ],
    "PySOT-New": [
        "pysot_optimizer.py",
        {}
    ],
    "RandomSearch-New": [
        "random_optimizer.py",
        {}
    ],
    "Scikit-GBRT-Hedge-New": [
        "scikit_optimizer.py",
        {
            "acq_func": "gp_hedge",
            "base_estimator": "GBRT",
            "n_initial_points": 5
        }
    ],
    "Scikit-GP-Hedge-New": [
        "scikit_optimizer.py",
        {
            "acq_func": "gp_hedge",
            "base_estimator": "GP",
            "n_initial_points": 5
        }
    ],
    "Scikit-GP-LCB-New": [
        "scikit_optimizer.py",
        {
            "acq_func": "LCB",
            "base_estimator": "GP",
            "n_initial_points": 5
        }
    ]
}


================================================
FILE: example_opt_root/flaky_optimizer.py
================================================
from time import sleep

import bayesmark.random_search as rs
from bayesmark import np_util
from bayesmark.abstract_optimizer import AbstractOptimizer
from bayesmark.experiment import experiment_main


class FlakyOptimizer(AbstractOptimizer):
    def __init__(self, api_config, random=np_util.random):
        """Build wrapper class to use random search function in benchmark.

        Settings for `suggest_dict` can be passed using kwargs.

        Parameters
        ----------
        api_config : dict-like of dict-like
            Configuration of the optimization variables. See API description.
        """
        AbstractOptimizer.__init__(self, api_config)
        self.random = random
        self.mode = self.random.choice(["normal", "crash", "delay"])

    def suggest(self, n_suggestions=1):
        """Get suggestion.

        Parameters
        ----------
        n_suggestions : int
            Desired number of parallel suggestions in the output

        Returns
        -------
        next_guess : list of dict
            List of `n_suggestions` suggestions to evaluate the objective
            function. Each suggestion is a dictionary where each key
            corresponds to a parameter being optimized.
        """
        if self.random.rand() <= 0.5 or self.mode == "normal":
            x_guess = rs.suggest_dict([], [], self.api_config, n_suggestions=n_suggestions, random=self.random)
        elif self.mode == "delay":
            sleep(15 * 60)  # 15 minutes
            x_guess = rs.suggest_dict([], [], self.api_config, n_suggestions=n_suggestions, random=self.random)
        elif self.mode == "crash":
            assert False, "Crashing for testing purposes"
        else:
            assert False, "Crashing, not for testing purposes"

        return x_guess

    def observe(self, X, y):
        """Feed an observation back.

        Parameters
        ----------
        X : list of dict-like
            Places where the objective function has already been evaluated.
            Each suggestion is a dictionary where each key corresponds to a
            parameter being optimized.
        y : array-like, shape (n,)
            Corresponding values where objective has been evaluated
        """
        # Random search so don't do anything
        pass


if __name__ == "__main__":
    experiment_main(FlakyOptimizer)


================================================
FILE: example_opt_root/hyperopt_optimizer.py
================================================
import numpy as np
from hyperopt import hp, tpe
from hyperopt.base import JOB_STATE_DONE, JOB_STATE_NEW, STATUS_OK, Domain, Trials
from scipy.interpolate import interp1d

from bayesmark.abstract_optimizer import AbstractOptimizer
from bayesmark.experiment import experiment_main
from bayesmark.np_util import random as np_random
from bayesmark.np_util import random_seed

# Sklearn prefers str to unicode:
DTYPE_MAP = {"real": float, "int": int, "bool": bool, "cat": str, "ordinal": str}


def dummy_f(x):
    assert False, "This is a placeholder, it should never be called."


def only(x):
    y, = x
    return y


class HyperoptOptimizer(AbstractOptimizer):
    primary_import = "hyperopt"

    def __init__(self, api_config, random=np_random):
        """Build wrapper class to use hyperopt optimizer in benchmark.

        Parameters
        ----------
        api_config : dict-like of dict-like
            Configuration of the optimization variables. See API description.
        """
        AbstractOptimizer.__init__(self, api_config)
        self.random = random

        space, self.round_to_values = HyperoptOptimizer.get_hyperopt_dimensions(api_config)
        self.domain = Domain(dummy_f, space, pass_expr_memo_ctrl=None)
        self.trials = Trials()

        # Some book keeping like opentuner wrapper
        self.trial_id_lookup = {}

        # Store just for data validation
        self.param_set_chk = frozenset(api_config.keys())

    @staticmethod
    def hashable_dict(d):
        """A custom function for hashing dictionaries.

        Parameters
        ----------
        d : dict or dict-like
            The dictionary to be converted to immutable/hashable type.

        Returns
        -------
        hashable_object : frozenset of tuple pairs
            Bijective equivalent to dict that can be hashed.
        """
        hashable_object = frozenset(d.items())
        return hashable_object

    @staticmethod
    def get_hyperopt_dimensions(api_config):
        """Help routine to setup hyperopt search space in constructor.

        Take api_config as argument so this can be static.
        """
        # The ordering of iteration prob makes no difference, but just to be
        # safe and consistnent with space.py, I will make sorted.
        param_list = sorted(api_config.keys())

        space = {}
        round_to_values = {}
        for param_name in param_list:
            param_config = api_config[param_name]

            param_type = param_config["type"]

            param_space = param_config.get("space", None)
            param_range = param_config.get("range", None)
            param_values = param_config.get("values", None)

            # Some setup for case that whitelist of values is provided:
            values_only_type = param_type in ("cat", "ordinal")
            if (param_values is not None) and (not values_only_type):
                assert param_range is None
                param_values = np.unique(param_values)
                param_range = (param_values[0], param_values[-1])
                round_to_values[param_name] = interp1d(
                    param_values, param_values, kind="nearest", fill_value="extrapolate"
                )

            if param_type == "int":
                low, high = param_range
                if param_space in ("log", "logit"):
                    space[param_name] = hp.qloguniform(param_name, np.log(low), np.log(high), 1)
                else:
                    space[param_name] = hp.quniform(param_name, low, high, 1)
            elif param_type == "bool":
                assert param_range is None
                assert param_values is None
                space[param_name] = hp.choice(param_name, (False, True))
            elif param_type in ("cat", "ordinal"):
                assert param_range is None
                space[param_name] = hp.choice(param_name, param_values)
            elif param_type == "real":
                low, high = param_range
                if param_space in ("log", "logit"):
                    space[param_name] = hp.loguniform(param_name, np.log(low), np.log(high))
                else:
                    space[param_name] = hp.uniform(param_name, low, high)
            else:
                assert False, "type %s not handled in API" % param_type

        return space, round_to_values

    def get_trial(self, trial_id):
        for trial in self.trials._dynamic_trials:
            if trial["tid"] == trial_id:
                assert isinstance(trial, dict)
                # Make sure right kind of dict
                assert "state" in trial and "result" in trial
                assert trial["state"] == JOB_STATE_NEW
                return trial
        assert False, "No matching trial ID"

    def cleanup_guess(self, x_guess):
        assert isinstance(x_guess, dict)
        # Also, check the keys are only the vars we are searching over:
        assert frozenset(x_guess.keys()) == self.param_set_chk

        # Do the rounding
        # Make a copy to be safe, and also unpack singletons
        # We may also need to consider clip_chk at some point like opentuner
        x_guess = {k: only(x_guess[k]) for k in x_guess}
        for param_name, round_f in self.round_to_values.items():
            x_guess[param_name] = round_f(x_guess[param_name])
        # Also ensure this is correct dtype so sklearn is happy
        x_guess = {k: DTYPE_MAP[self.api_config[k]["type"]](x_guess[k]) for k in x_guess}
        return x_guess

    def _suggest(self):
        """Helper function to `suggest` that does the work of calling
        `hyperopt` via its dumb API.
        """
        new_ids = self.trials.new_trial_ids(1)
        assert len(new_ids) == 1
        self.trials.refresh()

        seed = random_seed(self.random)
        new_trials = tpe.suggest(new_ids, self.domain, self.trials, seed)
        assert len(new_trials) == 1

        self.trials.insert_trial_docs(new_trials)
        self.trials.refresh()

        new_trial, = new_trials  # extract singleton
        return new_trial

    def suggest(self, n_suggestions=1):
        """Make `n_suggestions` suggestions for what to evaluate next.

        This requires the user observe all previous suggestions before calling
        again.

        Parameters
        ----------
        n_suggestions : int
            The number of suggestions to return.

        Returns
        -------
        next_guess : list of dict
            List of `n_suggestions` suggestions to evaluate the objective
            function. Each suggestion is a dictionary where each key
            corresponds to a parameter being optimized.
        """
        assert n_suggestions >= 1, "invalid value for n_suggestions"

        # Get the new trials, it seems hyperopt either uses random search or
        # guesses one at a time anyway, so we might as welll call serially.
        new_trials = [self._suggest() for _ in range(n_suggestions)]

        X = []
        for trial in new_trials:
            x_guess = self.cleanup_guess(trial["misc"]["vals"])
            X.append(x_guess)

            # Build lookup to get original trial object
            x_guess_ = HyperoptOptimizer.hashable_dict(x_guess)
            assert x_guess_ not in self.trial_id_lookup, "the suggestions should not already be in the trial dict"
            self.trial_id_lookup[x_guess_] = trial["tid"]

        assert len(X) == n_suggestions
        return X

    def observe(self, X, y):
        """Feed the observations back to hyperopt.

        Parameters
        ----------
        X : list of dict-like
            Places where the objective function has already been evaluated.
            Each suggestion is a dictionary where each key corresponds to a
            parameter being optimized.
        y : array-like, shape (n,)
            Corresponding values where objective has been evaluated.
        """
        assert len(X) == len(y)

        for x_guess, y_ in zip(X, y):
            x_guess_ = HyperoptOptimizer.hashable_dict(x_guess)
            assert x_guess_ in self.trial_id_lookup, "Appears to be guess that did not originate from suggest"

            trial_id = self.trial_id_lookup.pop(x_guess_)
            trial = self.get_trial(trial_id)
            assert self.cleanup_guess(trial["misc"]["vals"]) == x_guess, "trial ID not consistent with x values stored"

            # Cast to float to ensure native type
            result = {"loss": float(y_), "status": STATUS_OK}
            trial["state"] = JOB_STATE_DONE
            trial["result"] = result
        # hyperopt.fmin.FMinIter.serial_evaluate only does one refresh at end
        # of loop of a bunch of evals, so we will do the same thing here.
        self.trials.refresh()


if __name__ == "__main__":
    experiment_main(HyperoptOptimizer)


================================================
FILE: example_opt_root/nevergrad_optimizer.py
================================================
import nevergrad.optimization as optimization
import numpy as np
from nevergrad import instrumentation as inst
from scipy.stats import norm

from bayesmark.abstract_optimizer import AbstractOptimizer
from bayesmark.experiment import experiment_main
from bayesmark.np_util import linear_rescale
from bayesmark.space import Real


class NevergradOptimizer(AbstractOptimizer):
    primary_import = "nevergrad"

    def __init__(self, api_config, tool="OnePlusOne", budget=300):
        """Build wrapper class to use nevergrad optimizer in benchmark.

        Parameters
        ----------
        api_config : dict-like of dict-like
            Configuration of the optimization variables. See API description.
        budget : int
            Expected number of max function evals
        """
        AbstractOptimizer.__init__(self, api_config)

        self.instrum, self.space = NevergradOptimizer.get_nvg_dimensions(api_config)

        dimension = self.instrum.dimension
        opt_class = optimization.registry[tool]
        self.optim = opt_class(dimension=dimension, budget=budget)

    @staticmethod
    def get_nvg_dimensions(api_config):
        """Help routine to setup nevergrad search space in constructor.

        Take api_config as argument so this can be static.
        """
        # The ordering of iteration prob makes no difference, but just to be
        # safe and consistnent with space.py, I will make sorted.
        param_list = sorted(api_config.keys())

        all_args = {}
        all_prewarp = {}
        for param_name in param_list:
            param_config = api_config[param_name]

            param_type = param_config["type"]

            param_space = param_config.get("space", None)
            param_range = param_config.get("range", None)
            param_values = param_config.get("values", None)

            prewarp = None
            if param_type == "cat":
                assert param_space is None
                assert param_range is None
                arg = inst.var.SoftmaxCategorical(param_values)
            elif param_type == "bool":
                assert param_space is None
                assert param_range is None
                assert param_values is None
                arg = inst.var.OrderedDiscrete([False, True])
            elif param_values is not None:
                assert param_type in ("int", "ordinal", "real")
                arg = inst.var.OrderedDiscrete(param_values)
                # We are throwing away information here, but OrderedDiscrete
                # appears to be invariant to monotonic transformation anyway.
            elif param_type == "int":
                assert param_values is None
                # Need +1 since API in inclusive
                choices = range(int(param_range[0]), int(param_range[-1]) + 1)
                arg = inst.var.OrderedDiscrete(choices)
                # We are throwing away information here, but OrderedDiscrete
                # appears to be invariant to monotonic transformation anyway.
            elif param_type == "real":
                assert param_values is None
                assert param_range is not None
                # Will need to warp to this space sep.
                arg = inst.var.Gaussian(mean=0, std=1)
                prewarp = Real(warp=param_space, range_=param_range)
            else:
                assert False, "type %s not handled in API" % param_type

            all_args[param_name] = arg
            all_prewarp[param_name] = prewarp
        instrum = inst.Instrumentation(**all_args)
        return instrum, all_prewarp

    def prewarp(self, xx):
        """Extra work needed to get variables into the Gaussian space
        representation."""
        xxw = {}
        for arg_name, vv in xx.items():
            assert np.isscalar(vv)
            space = self.space[arg_name]

            if space is not None:
                # Warp so we think it is apriori uniform in [a, b]
                vv = space.warp(vv)
                assert vv.size == 1

                # Now make uniform on [0, 1], also unpack warped to scalar
                (lb, ub), = space.get_bounds()
                vv = linear_rescale(vv.item(), lb, ub, 0, 1)

                # Now make std Gaussian apriori
                vv = norm.ppf(vv)
            assert np.isscalar(vv)
            xxw[arg_name] = vv
        return xxw

    def postwarp(self, xxw):
        """Extra work needed to undo the Gaussian space representation."""
        xx = {}
        for arg_name, vv in xxw.items():
            assert np.isscalar(vv)
            space = self.space[arg_name]

            if space is not None:
                # Now make std Gaussian apriori
                vv = norm.cdf(vv)

                # Now make uniform on [0, 1]
                (lb, ub), = space.get_bounds()
                vv = linear_rescale(vv, 0, 1, lb, ub)

                # Warp so we think it is apriori uniform in [a, b]
                vv = space.unwarp([vv])
            assert np.isscalar(vv)
            xx[arg_name] = vv
        return xx

    def suggest(self, n_suggestions=1):
        """Get suggestion from nevergrad.

        Parameters
        ----------
        n_suggestions : int
            Desired number of parallel suggestions in the output

        Returns
        -------
        next_guess : list of dict
            List of `n_suggestions` suggestions to evaluate the objective
            function. Each suggestion is a dictionary where each key
            corresponds to a parameter being optimized.
        """
        x_guess_data = [self.optim.ask() for _ in range(n_suggestions)]

        x_guess = [None] * n_suggestions
        for ii, xx in enumerate(x_guess_data):
            x_pos, x_kwarg = self.instrum.data_to_arguments(xx)
            assert x_pos == ()
            x_guess[ii] = self.postwarp(x_kwarg)

        return x_guess

    def observe(self, X, y):
        """Feed an observation back to nevergrad.

        Parameters
        ----------
        X : list of dict-like
            Places where the objective function has already been evaluated.
            Each suggestion is a dictionary where each key corresponds to a
            parameter being optimized.
        y : array-like, shape (n,)
            Corresponding values where objective has been evaluated
        """
        for xx, yy in zip(X, y):
            xx = self.prewarp(xx)
            xx = self.instrum.arguments_to_data(**xx)
            self.optim.tell(xx, yy)


if __name__ == "__main__":
    experiment_main(NevergradOptimizer)


================================================
FILE: example_opt_root/opentuner_optimizer.py
================================================
"""
In opentuner, many search techniques are already available. All the names of
the techniques can be found as follows:
```
>>> import opentuner
>>> techniques, generators = opentuner.search.technique.all_techniques()
>>> for t in techniques:
...     print t.name
```
A user can also create new search techniques
(http://opentuner.org/tutorial/techniques/).

Opentuner will create a multi-arm bandit of multiple techniques if more than
one technique is specified in `args.technique`.

Some bandits with pre-defined techniques are already registered in:
`opentuner.search.bandittechniques`

By default, we use a pre-defined bandit called `'AUCBanditMetaTechniqueA'` of 4
techniques:
```
register(AUCBanditMetaTechnique([
        differentialevolution.DifferentialEvolutionAlt(),
        evolutionarytechniques.UniformGreedyMutation(),
        evolutionarytechniques.NormalGreedyMutation(mutation_rate=0.3),
        simplextechniques.RandomNelderMead()],
        name='AUCBanditMetaTechniqueA'))
```
The other two bandits used in our experiments are: PSO_GA_DE and PSO_GA_Bandit.
Specifying a list of multiple techniques will use a multi-arm bandit over them.
"""
import warnings
from argparse import Namespace

import opentuner.tuningrunmain
from opentuner.api import TuningRunManager
from opentuner.measurement.interface import DefaultMeasurementInterface as DMI
from opentuner.resultsdb.models import DesiredResult, Result
from opentuner.search.manipulator import (
    ConfigurationManipulator,
    EnumParameter,
    FloatParameter,
    IntegerParameter,
    LogFloatParameter,
    LogIntegerParameter,
    ScaledNumericParameter,
)

from bayesmark.abstract_optimizer import AbstractOptimizer
from bayesmark.experiment import experiment_main
from bayesmark.np_util import clip_chk

DEFAULT_TECHNIQUES = ("AUCBanditMetaTechniqueA",)
MEMORY_ONLY_DB = "sqlite://"

# Monkey patch here! Opentuner is messed up, TuningRunMain changes global log
# settings. We should file in issue report here and have them fix it.
opentuner.tuningrunmain.init_logging = lambda: None


def ClippedParam(cls, epsilon=1e-5):
    """Build wrapper class of opentuner parameter class that use clip check to
    keep parameters in the allowed range despite numerical errors.

    Class built on `ScaledNumericParameter` abstract class defined in:
    `opentuner.search.manipulator.ScaledNumericParameter`.

    Parameters
    ----------
    cls : ScaledNumericParameter
        Opentuner parameter class, such as `LogFloatParameter` or
        `FloatParameter`, which transforms the domain of parameter.

    Returns
    -------
    StableClass : ScaledNumericParameter
        New class equivalent to original `cls` but it overwrites the orginal
        `_unscale` method to enforce a clip check to keep the parameters within
        their allowed range.
    """
    assert issubclass(
        cls, ScaledNumericParameter
    ), "this class cls should inherit from the ScaledNumericParameter class"

    class StableClass(cls):
        def _unscale(self, v):
            unscaled_v = super(StableClass, self)._unscale(v)
            unscaled_v = clip_chk(unscaled_v, self.min_value, self.max_value)
            return unscaled_v

    return StableClass


class OpentunerOptimizer(AbstractOptimizer):
    primary_import = "opentuner"

    def __init__(self, api_config, techniques=DEFAULT_TECHNIQUES, n_suggestions=1):
        """Build wrapper class to use opentuner optimizer in benchmark.

        Parameters
        ----------
        api_config : dict-like of dict-like
            Configuration of the optimization variables. See API description.

        techniques : iterable of strings
            A list or tuple of techniques to use in opentuner. If the list
            has only one technique, then that technique will be used. If the
            list has multiple techniques a bandit over those techniques
            will be used.

        n_suggestions : int
            Default number of suggestions to be made in parallel.
        """
        AbstractOptimizer.__init__(self, api_config)

        # Opentuner requires DesiredResult to reference suggestion when making
        # its observation. x_to_dr maps the dict suggestion to DesiredResult.
        self.x_to_dr = {}
        # Keep last suggested x and repeat it whenever opentuner gives up.
        self.dummy_suggest = None

        """Setting up the arguments for opentuner. You can see all possible
        arguments using:
        ```
        >>> import opentuner
        >>> opentuner.default_argparser().parse_args(['-h'])
        ```
        We only change a few arguments (other arguments are set to defaults):
        * database = MEMORY_ONLY_DB: to use an in-memory sqlite database
        * parallelism = n_suggestions: num of suggestions to give in parallel
        * technique = techniques: a list of techniques to be used by opentuner
        * print_params = False: to avoid opentuner from exiting after printing
            param spaces
        """
        args = Namespace(
            bail_threshold=500,
            database=MEMORY_ONLY_DB,
            display_frequency=10,
            generate_bandit_technique=False,
            label=None,
            list_techniques=False,
            machine_class=None,
            no_dups=False,
            parallel_compile=False,
            parallelism=n_suggestions,
            pipelining=0,
            print_params=False,
            print_search_space_size=False,
            quiet=False,
            results_log=None,
            results_log_details=None,
            seed_configuration=[],
            stop_after=None,
            technique=techniques,
            test_limit=5000,
        )

        # Setup some dummy classes required by opentuner to actually run.
        manipulator = OpentunerOptimizer.build_manipulator(api_config)
        interface = DMI(args=args, manipulator=manipulator)
        self.api = TuningRunManager(interface, args)

    @staticmethod
    def hashable_dict(d):
        """A custom function for hashing dictionaries.

        Parameters
        ----------
        d : dict or dict-like
            The dictionary to be converted to immutable/hashable type.

        Returns
        -------
        hashable_object : frozenset of tuple pairs
            Bijective equivalent to dict that can be hashed.
        """
        hashable_object = frozenset(d.items())
        return hashable_object

    @staticmethod
    def build_manipulator(api_config):
        """Build a ConfigurationManipulator object to be used by opentuner.

        Parameters
        ----------
        api_config : dict-like of dict-like
            Configuration of the optimization variables. See API description.

        Returns
        -------
        manipulator : ConfigurationManipulator
            Some over complexified class required by opentuner to run.
        """
        manipulator = ConfigurationManipulator()

        for pname in api_config:
            ptype = api_config[pname]["type"]
            pspace = api_config[pname].get("space", None)
            pmin, pmax = api_config[pname].get("range", (None, None))

            if ptype == "real":
                if pspace in ("linear", "logit"):
                    ot_param = FloatParameter(pname, pmin, pmax)
                elif pspace in ("log", "bilog"):
                    LogFloatParameter_ = ClippedParam(LogFloatParameter)
                    ot_param = LogFloatParameter_(pname, pmin, pmax)
                else:
                    assert False, "unsupported param space = %s" % pspace
            elif ptype == "int":
                if pspace in ("linear", "logit"):
                    ot_param = IntegerParameter(pname, pmin, pmax)
                elif pspace in ("log", "bilog"):
                    ot_param = LogIntegerParameter(pname, pmin, pmax)
                else:
                    assert False, "unsupported param space = %s" % pspace
            elif ptype == "bool":
                # The actual bool parameter seems not to work in Py3 :(
                ot_param = IntegerParameter(pname, 0, 1)
            elif ptype in ("cat", "ordinal"):
                # Treat ordinal and categorical variables the same for now.
                assert "values" in api_config[pname]
                pvalues = api_config[pname]["values"]
                ot_param = EnumParameter(pname, pvalues)
            else:
                assert False, "type=%s/space=%s not handled in opentuner yet" % (ptype, pspace)
            manipulator.add_parameter(ot_param)
        return manipulator

    def suggest(self, n_suggestions=1):
        """Make `n_suggestions` suggestions for what to evaluate next.

        This requires the user observe all previous suggestions before calling
        again.

        Parameters
        ----------
        n_suggestions : int
            The number of suggestions to return.

        Returns
        -------
        next_guess : list of dict
            List of `n_suggestions` suggestions to evaluate the objective
            function. Each suggestion is a dictionary where each key
            corresponds to a parameter being optimized.
        """
        assert n_suggestions >= 1, "invalid value for n_suggestions"

        # Update the n_suggestions if it is different from the current setting.
        if self.api.search_driver.args.parallelism != n_suggestions:
            self.api.search_driver.args.parallelism = n_suggestions
            warnings.warn("n_suggestions changed across suggest calls")

        # Require the user to already observe all previous suggestions.
        # Otherwise, opentuner will just recycle old suggestions.
        assert len(self.x_to_dr) == 0, "all the previous suggestions should have been observed by now"

        # The real meat of suggest from opentuner: Get next `n_suggestions`
        # unique suggestions.
        desired_results = [self.api.get_next_desired_result() for _ in range(n_suggestions)]

        # Save DesiredResult object in dict since observe will need it.
        X = []
        using_dummy_suggest = False
        for ii in range(n_suggestions):
            # Opentuner can give up, but the API requires guessing forever.
            if desired_results[ii] is None:
                assert self.dummy_suggest is not None, "opentuner gave up on the first call!"
                # Use the dummy suggestion in this case.
                X.append(self.dummy_suggest)
                using_dummy_suggest = True
                continue

            # Get the simple dict equivalent to suggestion.
            x_guess = desired_results[ii].configuration.data
            X.append(x_guess)

            # Now save the desired result for future use in observe.
            x_guess_ = OpentunerOptimizer.hashable_dict(x_guess)
            assert x_guess_ not in self.x_to_dr, "the suggestions should not already be in the x_to_dr dict"
            self.x_to_dr[x_guess_] = desired_results[ii]
            # This will also catch None from opentuner.
            assert isinstance(self.x_to_dr[x_guess_], DesiredResult)

        assert len(X) == n_suggestions, "incorrect number of suggestions provided by opentuner"
        # Log suggestion for repeating if opentuner gives up next time. We can
        # only do this when it is not already being used since it we will be
        # checking guesses against dummy_suggest in observe.
        if not using_dummy_suggest:
            self.dummy_suggest = X[-1]
        return X

    def observe(self, X, y):
        """Feed the observations back to opentuner.

        Parameters
        ----------
        X : list of dict-like
            Places where the objective function has already been evaluated.
            Each suggestion is a dictionary where each key corresponds to a
            parameter being optimized.
        y : array-like, shape (n,)
            Corresponding values where objective has been evaluated.
        """
        assert len(X) == len(y)

        for x_guess, y_ in zip(X, y):
            x_guess_ = OpentunerOptimizer.hashable_dict(x_guess)

            # If we can't find the dr object then it must be the dummy guess.
            if x_guess_ not in self.x_to_dr:
                assert x_guess == self.dummy_suggest, "Appears to be guess that did not originate from suggest"
                continue

            # Get the corresponding DesiredResult object.
            dr = self.x_to_dr.pop(x_guess_, None)
            # This will also catch None from opentuner.
            assert isinstance(dr, DesiredResult), "DesiredResult object not available in x_to_dr"

            # Opentuner's arg names assume we are minimizing execution time.
            # So, if we want to minimize we have to pretend y is a 'time'.
            result = Result(time=y_)
            self.api.report_result(dr, result)


if __name__ == "__main__":
    experiment_main(OpentunerOptimizer)


================================================
FILE: example_opt_root/pysot_optimizer.py
================================================
import warnings
from copy import copy

import numpy as np
from poap.strategy import EvalRecord
from pySOT.experimental_design import SymmetricLatinHypercube
from pySOT.optimization_problems import OptimizationProblem
from pySOT.strategy import SRBFStrategy
from pySOT.surrogate import CubicKernel, LinearTail, RBFInterpolant

from bayesmark.abstract_optimizer import AbstractOptimizer
from bayesmark.experiment import experiment_main
from bayesmark.space import JointSpace


class PySOTOptimizer(AbstractOptimizer):
    primary_import = "pysot"

    def __init__(self, api_config):
        """Build wrapper class to use an optimizer in benchmark.

        Parameters
        ----------
        api_config : dict-like of dict-like
            Configuration of the optimization variables. See API description.
        """
        AbstractOptimizer.__init__(self, api_config)

        self.space_x = JointSpace(api_config)
        self.bounds = self.space_x.get_bounds()
        self.create_opt_prob()  # Sets up the optimization problem (needs self.bounds)
        self.max_evals = np.iinfo(np.int32).max  # NOTE: Largest possible int
        self.batch_size = None
        self.history = []
        self.proposals = []

    def create_opt_prob(self):
        """Create an optimization problem object."""
        opt = OptimizationProblem()
        opt.lb = self.bounds[:, 0]  # In warped space
        opt.ub = self.bounds[:, 1]  # In warped space
        opt.dim = len(self.bounds)
        opt.cont_var = np.arange(len(self.bounds))
        opt.int_var = []
        assert len(opt.cont_var) + len(opt.int_var) == opt.dim
        opt.objfun = None
        self.opt = opt

    def start(self, max_evals):
        """Starts a new pySOT run."""
        self.history = []
        self.proposals = []

        # Symmetric Latin hypercube design
        des_pts = max([self.batch_size, 2 * (self.opt.dim + 1)])
        slhd = SymmetricLatinHypercube(dim=self.opt.dim, num_pts=des_pts)

        # Warped RBF interpolant
        rbf = RBFInterpolant(
            dim=self.opt.dim,
            lb=self.opt.lb,
            ub=self.opt.ub,
            kernel=CubicKernel(),
            tail=LinearTail(self.opt.dim),
            eta=1e-4,
        )

        # Optimization strategy
        self.strategy = SRBFStrategy(
            max_evals=self.max_evals,
            opt_prob=self.opt,
            exp_design=slhd,
            surrogate=rbf,
            asynchronous=True,
            batch_size=1,
            use_restarts=True,
        )

    def suggest(self, n_suggestions=1):
        """Get a suggestion from the optimizer.

        Parameters
        ----------
        n_suggestions : int
            Desired number of parallel suggestions in the output

        Returns
        -------
        next_guess : list of dict
            List of `n_suggestions` suggestions to evaluate the objective
            function. Each suggestion is a dictionary where each key
            corresponds to a parameter being optimized.
        """

        if self.batch_size is None:  # First call to suggest
            self.batch_size = n_suggestions
            self.start(self.max_evals)

        # Set the tolerances pretending like we are running batch
        d, p = float(self.opt.dim), float(n_suggestions)
        self.strategy.failtol = p * int(max(np.ceil(d / p), np.ceil(4 / p)))

        # Now we can make suggestions
        x_w = []
        self.proposals = []
        for _ in range(n_suggestions):
            proposal = self.strategy.propose_action()
            record = EvalRecord(proposal.args, status="pending")
            proposal.record = record
            proposal.accept()  # This triggers all the callbacks

            # It is possible that pySOT proposes a previously evaluated point
            # when all variables are integers, so we just abort in this case
            # since we have likely converged anyway. See PySOT issue #30.
            x = list(proposal.record.params)  # From tuple to list
            x_unwarped, = self.space_x.unwarp(x)
            if x_unwarped in self.history:
                warnings.warn("pySOT proposed the same point twice")
                self.start(self.max_evals)
                return self.suggest(n_suggestions=n_suggestions)

            # NOTE: Append unwarped to avoid rounding issues
            self.history.append(copy(x_unwarped))
            self.proposals.append(proposal)
            x_w.append(copy(x_unwarped))

        return x_w

    def _observe(self, x, y):
        # Find the matching proposal and execute its callbacks
        idx = [x == xx for xx in self.history]
        i = np.argwhere(idx)[0].item()  # Pick the first index if there are ties
        proposal = self.proposals[i]
        proposal.record.complete(y)
        self.proposals.pop(i)
        self.history.pop(i)

    def observe(self, X, y):
        """Send an observation of a suggestion back to the optimizer.

        Parameters
        ----------
        X : list of dict-like
            Places where the objective function has already been evaluated.
            Each suggestion is a dictionary where each key corresponds to a
            parameter being optimized.
        y : array-like, shape (n,)
            Corresponding values where objective has been evaluated
        """
        assert len(X) == len(y)

        for x_, y_ in zip(X, y):
            # Just ignore, any inf observations we got, unclear if right thing
            if np.isfinite(y_):
                self._observe(x_, y_)


if __name__ == "__main__":
    experiment_main(PySOTOptimizer)


================================================
FILE: example_opt_root/random_optimizer.py
================================================
import bayesmark.random_search as rs
from bayesmark import np_util
from bayesmark.abstract_optimizer import AbstractOptimizer
from bayesmark.experiment import experiment_main


class RandomOptimizer(AbstractOptimizer):
    # Unclear what is best package to list for primary_import here.
    primary_import = "bayesmark"

    def __init__(self, api_config, random=np_util.random):
        """Build wrapper class to use random search function in benchmark.

        Settings for `suggest_dict` can be passed using kwargs.

        Parameters
        ----------
        api_config : dict-like of dict-like
            Configuration of the optimization variables. See API description.
        """
        AbstractOptimizer.__init__(self, api_config)
        self.random = random

    def suggest(self, n_suggestions=1):
        """Get suggestion.

        Parameters
        ----------
        n_suggestions : int
            Desired number of parallel suggestions in the output

        Returns
        -------
        next_guess : list of dict
            List of `n_suggestions` suggestions to evaluate the objective
            function. Each suggestion is a dictionary where each key
            corresponds to a parameter being optimized.
        """
        x_guess = rs.suggest_dict([], [], self.api_config, n_suggestions=n_suggestions, random=self.random)
        return x_guess

    def observe(self, X, y):
        """Feed an observation back.

        Parameters
        ----------
        X : list of dict-like
            Places where the objective function has already been evaluated.
            Each suggestion is a dictionary where each key corresponds to a
            parameter being optimized.
        y : array-like, shape (n,)
            Corresponding values where objective has been evaluated
        """
        # Random search so don't do anything
        pass


if __name__ == "__main__":
    experiment_main(RandomOptimizer)


================================================
FILE: example_opt_root/scikit_optimizer.py
================================================
import numpy as np
from scipy.interpolate import interp1d
from skopt import Optimizer as SkOpt
from skopt.space import Categorical, Integer, Real

from bayesmark.abstract_optimizer import AbstractOptimizer
from bayesmark.experiment import experiment_main


class ScikitOptimizer(AbstractOptimizer):
    primary_import = "scikit-optimize"

    def __init__(self, api_config, base_estimator="GP", acq_func="gp_hedge", n_initial_points=5):
        """Build wrapper class to use an optimizer in benchmark.

        Parameters
        ----------
        api_config : dict-like of dict-like
            Configuration of the optimization variables. See API description.
        base_estimator : {'GP', 'RF', 'ET', 'GBRT'}
            How to estimate the objective function.
        acq_func : {'LCB', 'EI', 'PI', 'gp_hedge', 'EIps', 'PIps'}
            Acquisition objective to decide next suggestion.
        n_initial_points : int
            Number of points to sample randomly before actual Bayes opt.
        """
        AbstractOptimizer.__init__(self, api_config)

        dimensions, self.round_to_values = ScikitOptimizer.get_sk_dimensions(api_config)

        # Older versions of skopt don't copy over the dimensions names during
        # normalization and hence the names are missing in
        # self.skopt.space.dimensions. Therefore, we save our own copy of
        # dimensions list to be safe. If we can commit to using the newer
        # versions of skopt we can delete self.dimensions.
        self.dimensions_list = tuple(dd.name for dd in dimensions)

        self.skopt = SkOpt(
            dimensions,
            n_initial_points=n_initial_points,
            base_estimator=base_estimator,
            acq_func=acq_func,
            acq_optimizer="auto",
            acq_func_kwargs={},
            acq_optimizer_kwargs={},
        )

    @staticmethod
    def get_sk_dimensions(api_config, transform="normalize"):
        """Help routine to setup skopt search space in constructor.

        Take api_config as argument so this can be static.
        """
        # The ordering of iteration prob makes no difference, but just to be
        # safe and consistnent with space.py, I will make sorted.
        param_list = sorted(api_config.keys())

        sk_dims = []
        round_to_values = {}
        for param_name in param_list:
            param_config = api_config[param_name]

            param_type = param_config["type"]

            param_space = param_config.get("space", None)
            param_range = param_config.get("range", None)
            param_values = param_config.get("values", None)

            # Some setup for case that whitelist of values is provided:
            values_only_type = param_type in ("cat", "ordinal")
            if (param_values is not None) and (not values_only_type):
                assert param_range is None
                param_values = np.unique(param_values)
                param_range = (param_values[0], param_values[-1])
                round_to_values[param_name] = interp1d(
                    param_values, param_values, kind="nearest", fill_value="extrapolate"
                )

            if param_type == "int":
                # Integer space in sklearn does not support any warping => Need
                # to leave the warping as linear in skopt.
                sk_dims.append(Integer(param_range[0], param_range[-1], transform=transform, name=param_name))
            elif param_type == "bool":
                assert param_range is None
                assert param_values is None
                sk_dims.append(Integer(0, 1, transform=transform, name=param_name))
            elif param_type in ("cat", "ordinal"):
                assert param_range is None
                # Leave x-form to one-hot as per skopt default
                sk_dims.append(Categorical(param_values, name=param_name))
            elif param_type == "real":
                # Skopt doesn't support all our warpings, so need to pick
                # closest substitute it does support.
                prior = "log-uniform" if param_space in ("log", "logit") else "uniform"
                sk_dims.append(Real(param_range[0], param_range[-1], prior=prior, transform=transform, name=param_name))
            else:
                assert False, "type %s not handled in API" % param_type
        return sk_dims, round_to_values

    def suggest(self, n_suggestions=1):
        """Get a suggestion from the optimizer.

        Parameters
        ----------
        n_suggestions : int
            Desired number of parallel suggestions in the output

        Returns
        -------
        next_guess : list of dict
            List of `n_suggestions` suggestions to evaluate the objective
            function. Each suggestion is a dictionary where each key
            corresponds to a parameter being optimized.
        """
        # First get list of lists from skopt.ask()
        next_guess = self.skopt.ask(n_points=n_suggestions)
        # Then convert to list of dicts
        next_guess = [dict(zip(self.dimensions_list, x)) for x in next_guess]

        # Now do the rounding, custom rounding is not supported in skopt. Note
        # that there is not nec a round function for each dimension here.
        for param_name, round_f in self.round_to_values.items():
            for xx in next_guess:
                xx[param_name] = round_f(xx[param_name])
        return next_guess

    def observe(self, X, y):
        """Send an observation of a suggestion back to the optimizer.

        Parameters
        ----------
        X : list of dict-like
            Places where the objective function has already been evaluated.
            Each suggestion is a dictionary where each key corresponds to a
            parameter being optimized.
        y : array-like, shape (n,)
            Corresponding values where objective has been evaluated
        """
        # Supposedly skopt can handle blocks, but not sure about interface for
        # that. Just do loop to be safe for now.
        for xx, yy in zip(X, y):
            # skopt needs lists instead of dicts
            xx = [xx[dim_name] for dim_name in self.dimensions_list]
            # Just ignore, any inf observations we got, unclear if right thing
            if np.isfinite(yy):
                self.skopt.tell(xx, yy)


if __name__ == "__main__":
    experiment_main(ScikitOptimizer)


================================================
FILE: integration_test.sh
================================================
#!/bin/bash

set -ex
set -o pipefail

# Be able to check if using version out of tar ball
which bayesmark-launch
which bayesmark-exp
which bayesmark-agg
which bayesmark-anal

DB_ROOT=./notebooks
DBID=bo_example_folder

bayesmark-launch -n 15 -r 2 -dir $DB_ROOT -b $DBID -o RandomSearch PySOT OpenTuner-BanditA -c SVM DT -d boston breast -v
bayesmark-agg -dir $DB_ROOT -b $DBID
bayesmark-anal -dir $DB_ROOT -b $DBID -v

# Try ipynb export
python -m ipykernel install --name=bobm_ipynb --user
jupyter nbconvert --to html --execute notebooks/plot_mean_score.ipynb --ExecutePreprocessor.timeout=-1
jupyter nbconvert --to html --execute notebooks/plot_test_case.ipynb --ExecutePreprocessor.timeout=-1

# Try dry run
bayesmark-launch -n 15 -r 3 -dir $DB_ROOT -b $DBID -o RandomSearch PySOT OpenTuner-BanditA -c SVM DT -nj 50 -v

# Try again but use the custom optimizers
mv $DB_ROOT/$DBID old
bayesmark-launch -n 15 -r 1 -dir $DB_ROOT -b $DBID -o RandomSearch PySOT-New OpenTuner-BanditA-New -c SVM DT --opt-root ./example_opt_root -d boston breast -v
bayesmark-agg -dir $DB_ROOT -b $DBID
bayesmark-anal -dir $DB_ROOT -b $DBID -v

# Export again
jupyter nbconvert --to html --execute notebooks/plot_mean_score.ipynb --ExecutePreprocessor.timeout=-1
jupyter nbconvert --to html --execute notebooks/plot_test_case.ipynb --ExecutePreprocessor.timeout=-1

# Try dry run
bayesmark-launch -n 15 -r 2 -dir $DB_ROOT -b $DBID -o RandomSearch PySOT-New OpenTuner-BanditA-New -c SVM DT --opt-root ./example_opt_root -nj 50 -v

echo "success"


================================================
FILE: integration_test_with_setup.sh
================================================
#!/bin/bash

set -ex
set -o pipefail

export PIP_REQUIRE_VIRTUALENV=false

# Handy to know what we are working with
git --version
python --version
pip freeze | sort

# Cleanup workspace, src for any old -e installs
git clean -x -f -d
rm -rf src/

# See if opentuner will work in env (but this command does not work on Mac)
# dpkg -l | grep libsqlite

# Simulate deployment with wheel
./build_wheel.sh
mv -v dist/bayesmark-* dist/bayesmark.tar.gz

# Install and run local optimizers
mkdir install_test
cp -r ./notebooks install_test
cp -r ./example_opt_root install_test

cd install_test
virtualenv bobm_ipynb --python=python3
source ./bobm_ipynb/bin/activate
python --version
pip freeze | sort

# Remove this if we want to make sure everything is compatible with latest
# pip install -r ../requirements/optimizers.txt

pip install ../dist/bayesmark.tar.gz[optimizers,notebooks]
../integration_test.sh

# wrap up
deactivate
cd ..

echo "success with setup wrapper too"


================================================
FILE: notebooks/dummy.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import ipykernel
import jupyter
import jupyter_core
import nbconvert

# import extra deps and use it to keep pipreqs and flake8 happy
for pkg in (ipykernel, jupyter, jupyter_core, nbconvert):
    print("%s %s" % (pkg.__name__, pkg.__version__))


================================================
FILE: notebooks/plot_mean_score.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from matplotlib import cm, colors, rcParams\n",
    "\n",
    "import bayesmark.constants as cc\n",
    "import bayesmark.xr_util as xru\n",
    "from bayesmark.serialize import XRSerializer\n",
    "from bayesmark.constants import ITER, METHOD, ARG_DELIM, OBJECTIVE, VISIBLE_TO_OPT\n",
    "from bayesmark.path_util import abspath\n",
    "from bayesmark.util import preimage_func"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# User settings, must specify location of the data to make plots here for this to run\n",
    "DB_ROOT = abspath(\".\")\n",
    "DBID = \"bo_example_folder\"\n",
    "metric_for_scoring = VISIBLE_TO_OPT"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Matplotlib setup\n",
    "# Note this will put type-3 font BS in the pdfs, if it matters\n",
    "rcParams[\"mathtext.fontset\"] = \"stix\"\n",
    "rcParams[\"font.family\"] = \"STIXGeneral\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def build_color_dict(names):\n",
    "    \"\"\"Make a color dictionary to give each name a mpl color.\n",
    "    \"\"\"\n",
    "    norm = colors.Normalize(vmin=0, vmax=1)\n",
    "    m = cm.ScalarMappable(norm, cm.tab20)\n",
    "    color_dict = m.to_rgba(np.linspace(0, 1, len(names)))\n",
    "    color_dict = dict(zip(names, color_dict))\n",
    "    return color_dict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load the data\n",
    "summary_ds, meta = XRSerializer.load_derived(DB_ROOT, db=DBID, key=cc.MEAN_SCORE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "method_to_rgba = build_color_dict(summary_ds.coords[METHOD].values.tolist())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Group methods by the package behind them\n",
    "method_only = lambda method_rev: method_rev.split(ARG_DELIM, 1)[0]\n",
    "groups = preimage_func(method_only, summary_ds.coords[METHOD].values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Make a plot for each package\n",
    "for method_name in groups:\n",
    "    plt.figure(figsize=(5, 5), dpi=300)\n",
    "    for method_ver_name in groups[method_name]:\n",
    "        curr_ds = summary_ds.sel({METHOD: method_ver_name, OBJECTIVE: metric_for_scoring})\n",
    "        curr_ds.coords[ITER].values\n",
    "\n",
    "        plt.fill_between(\n",
    "            curr_ds.coords[ITER].values,\n",
    "            curr_ds[cc.LB_MED].values,\n",
    "            curr_ds[cc.UB_MED].values,\n",
    "            color=method_to_rgba[method_ver_name],\n",
    "            alpha=0.5,\n",
    "        )\n",
    "        plt.plot(\n",
    "            curr_ds.coords[ITER].values,\n",
    "            curr_ds[cc.PERF_MED].values,\n",
    "            color=method_to_rgba[method_ver_name],\n",
    "            label=method_name,\n",
    "            marker=\".\",\n",
    "        )\n",
    "    plt.xlabel(\"evaluation\", fontsize=10)\n",
    "    plt.ylabel(\"normalized median score\", fontsize=10)\n",
    "    plt.title(method_name)\n",
    "    plt.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc=\"upper left\", borderaxespad=0.0)\n",
    "    plt.grid()\n",
    "\n",
    "    plt.figure(figsize=(5, 5), dpi=300)\n",
    "    for method_ver_name in groups[method_name]:\n",
    "        curr_ds = summary_ds.sel({METHOD: method_ver_name, OBJECTIVE: metric_for_scoring})\n",
    "        curr_ds.coords[ITER].values\n",
    "\n",
    "        plt.fill_between(\n",
    "            curr_ds.coords[ITER].values,\n",
    "            curr_ds[cc.LB_MEAN].values,\n",
    "            curr_ds[cc.UB_MEAN].values,\n",
    "            color=method_to_rgba[method_ver_name],\n",
    "            alpha=0.5,\n",
    "        )\n",
    "        plt.plot(\n",
    "            curr_ds.coords[ITER].values,\n",
    "            curr_ds[cc.PERF_MEAN].values,\n",
    "            color=method_to_rgba[method_ver_name],\n",
    "            label=method_name,\n",
    "            marker=\".\",\n",
    "        )\n",
    "    plt.xlabel(\"evaluation\", fontsize=10)\n",
    "    plt.ylabel(\"mean score\", fontsize=10)\n",
    "    plt.title(method_name)\n",
    "    plt.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc=\"upper left\", borderaxespad=0.0)\n",
    "    plt.grid()\n",
    "\n",
    "    plt.figure(figsize=(5, 5), dpi=300)\n",
    "    for method_ver_name in groups[method_name]:\n",
    "        curr_ds = summary_ds.sel({METHOD: method_ver_name, OBJECTIVE: metric_for_scoring})\n",
    "        curr_ds.coords[ITER].values\n",
    "\n",
    "        plt.fill_between(\n",
    "            curr_ds.coords[ITER].values,\n",
    "            curr_ds[cc.LB_NORMED_MEAN].values,\n",
    "            curr_ds[cc.UB_NORMED_MEAN].values,\n",
    "            color=method_to_rgba[method_ver_name],\n",
    "            alpha=0.5,\n",
    "        )\n",
    "        plt.plot(\n",
    "            curr_ds.coords[ITER].values,\n",
    "            curr_ds[cc.NORMED_MEAN].values,\n",
    "            color=method_to_rgba[method_ver_name],\n",
    "            label=method_name,\n",
    "            marker=\".\",\n",
    "        )\n",
    "    plt.xlabel(\"evaluation\", fontsize=10)\n",
    "    plt.ylabel(\"normalized mean score\", fontsize=10)\n",
    "    plt.title(method_name)\n",
    "    plt.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc=\"upper left\", borderaxespad=0.0)\n",
    "    plt.grid()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Make the summary plot\n",
    "plt.figure(figsize=(5, 5), dpi=300)\n",
    "for method_ver_name in summary_ds.coords[METHOD].values:\n",
    "    curr_ds = summary_ds.sel({METHOD: method_ver_name, OBJECTIVE: metric_for_scoring})\n",
    "    curr_ds.coords[ITER].values\n",
    "\n",
    "    plt.fill_between(\n",
    "        curr_ds.coords[ITER].values,\n",
    "        curr_ds[cc.LB_MED].values,\n",
    "        curr_ds[cc.UB_MED].values,\n",
    "        color=method_to_rgba[method_ver_name],\n",
    "        alpha=0.5,\n",
    "    )\n",
    "    plt.plot(\n",
    "        curr_ds.coords[ITER].values,\n",
    "        curr_ds[cc.PERF_MED].values,\n",
    "        color=method_to_rgba[method_ver_name],\n",
    "        label=method_ver_name,\n",
    "        marker=\".\",\n",
    "    )\n",
    "plt.xlabel(\"evaluation\", fontsize=10)\n",
    "plt.ylabel(\"normalized median score\", fontsize=10)\n",
    "plt.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc=\"upper left\", borderaxespad=0.0)\n",
    "plt.grid()\n",
    "\n",
    "plt.figure(figsize=(5, 5), dpi=300)\n",
    "for method_ver_name in summary_ds.coords[METHOD].values:\n",
    "    curr_ds = summary_ds.sel({METHOD: method_ver_name, OBJECTIVE: metric_for_scoring})\n",
    "    curr_ds.coords[ITER].values\n",
    "\n",
    "    plt.fill_between(\n",
    "        curr_ds.coords[ITER].values,\n",
    "        curr_ds[cc.LB_MEAN].values,\n",
    "        curr_ds[cc.UB_MEAN].values,\n",
    "        color=method_to_rgba[method_ver_name],\n",
    "        alpha=0.5,\n",
    "    )\n",
    "    plt.plot(\n",
    "        curr_ds.coords[ITER].values,\n",
    "        curr_ds[cc.PERF_MEAN].values,\n",
    "        color=method_to_rgba[method_ver_name],\n",
    "        label=method_ver_name,\n",
    "        marker=\".\",\n",
    "    )\n",
    "plt.xlabel(\"evaluation\", fontsize=10)\n",
    "plt.ylabel(\"mean score\", fontsize=10)\n",
    "plt.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc=\"upper left\", borderaxespad=0.0)\n",
    "plt.grid()\n",
    "\n",
    "plt.figure(figsize=(5, 5), dpi=300)\n",
    "for method_ver_name in summary_ds.coords[METHOD].values:\n",
    "    curr_ds = summary_ds.sel({METHOD: method_ver_name, OBJECTIVE: metric_for_scoring})\n",
    "    curr_ds.coords[ITER].values\n",
    "\n",
    "    plt.fill_between(\n",
    "        curr_ds.coords[ITER].values,\n",
    "        curr_ds[cc.LB_NORMED_MEAN].values,\n",
    "        curr_ds[cc.UB_NORMED_MEAN].values,\n",
    "        color=method_to_rgba[method_ver_name],\n",
    "        alpha=0.5,\n",
    "    )\n",
    "    plt.plot(\n",
    "        curr_ds.coords[ITER].values,\n",
    "        curr_ds[cc.NORMED_MEAN].values,\n",
    "        color=method_to_rgba[method_ver_name],\n",
    "        label=method_ver_name,\n",
    "        marker=\".\",\n",
    "    )\n",
    "plt.xlabel(\"evaluation\", fontsize=10)\n",
    "plt.ylabel(\"normalized mean score\", fontsize=10)\n",
    "plt.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc=\"upper left\", borderaxespad=0.0)\n",
    "plt.grid()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "bobm_ipynb",
   "language": "python",
   "name": "bobm_ipynb"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: notebooks/plot_test_case.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "from matplotlib import cm, colors, rcParams\n",
    "\n",
    "import numpy as np\n",
    "\n",
    "import bayesmark.constants as cc\n",
    "from bayesmark.path_util import abspath\n",
    "from bayesmark.serialize import XRSerializer\n",
    "from bayesmark.constants import ITER, METHOD, TEST_CASE, OBJECTIVE, VISIBLE_TO_OPT"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# User settings, must specify location of the data to make plots here for this to run\n",
    "DB_ROOT = abspath(\".\")\n",
    "DBID = \"bo_example_folder\"\n",
    "metric_for_scoring = VISIBLE_TO_OPT"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Matplotlib setup\n",
    "# Note this will put type-3 font BS in the pdfs, if it matters\n",
    "rcParams[\"mathtext.fontset\"] = \"stix\"\n",
    "rcParams[\"font.family\"] = \"STIXGeneral\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def build_color_dict(names):\n",
    "    \"\"\"Make a color dictionary to give each name a mpl color.\n",
    "    \"\"\"\n",
    "    norm = colors.Normalize(vmin=0, vmax=1)\n",
    "    m = cm.ScalarMappable(norm, cm.tab20)\n",
    "    color_dict = m.to_rgba(np.linspace(0, 1, len(names)))\n",
    "    color_dict = dict(zip(names, color_dict))\n",
    "    return color_dict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load the data\n",
    "agg_results_ds, meta = XRSerializer.load_derived(DB_ROOT, db=DBID, key=cc.PERF_RESULTS)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Setup for plotting\n",
    "method_list = agg_results_ds.coords[METHOD].values\n",
    "method_to_rgba = build_color_dict(method_list.tolist())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Make the plots for inidividual test functions\n",
    "for func_name in agg_results_ds.coords[TEST_CASE].values:\n",
    "    plt.figure(figsize=(5, 5), dpi=300)\n",
    "    for method_name in method_list:\n",
    "        curr_ds = agg_results_ds.sel({TEST_CASE: func_name, METHOD: method_name, OBJECTIVE: metric_for_scoring})\n",
    "\n",
    "        plt.fill_between(\n",
    "            curr_ds.coords[ITER].values,\n",
    "            curr_ds[cc.LB_MED].values,\n",
    "            curr_ds[cc.UB_MED].values,\n",
    "            color=method_to_rgba[method_name],\n",
    "            alpha=0.5,\n",
    "        )\n",
    "        plt.plot(\n",
    "            curr_ds.coords[ITER].values,\n",
    "            curr_ds[cc.PERF_MED].values,\n",
    "            color=method_to_rgba[method_name],\n",
    "            label=method_name,\n",
    "            marker=\".\",\n",
    "        )\n",
    "    plt.xlabel(\"evaluation\", fontsize=10)\n",
    "    plt.ylabel(\"median score\", fontsize=10)\n",
    "    plt.title(func_name)\n",
    "    plt.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc=\"upper left\", borderaxespad=0.0)\n",
    "    plt.grid()\n",
    "\n",
    "    plt.figure(figsize=(5, 5), dpi=300)\n",
    "    for method_name in method_list:\n",
    "        curr_ds = agg_results_ds.sel({TEST_CASE: func_name, METHOD: method_name, OBJECTIVE: metric_for_scoring})\n",
    "\n",
    "        plt.fill_between(\n",
    "            curr_ds.coords[ITER].values,\n",
    "            curr_ds[cc.LB_MEAN].values,\n",
    "            curr_ds[cc.UB_MEAN].values,\n",
    "            color=method_to_rgba[method_name],\n",
    "            alpha=0.5,\n",
    "        )\n",
    "        plt.plot(\n",
    "            curr_ds.coords[ITER].values,\n",
    "            curr_ds[cc.PERF_MEAN].values,\n",
    "            color=method_to_rgba[method_name],\n",
    "            label=method_name,\n",
    "            marker=\".\",\n",
    "        )\n",
    "    plt.xlabel(\"evaluation\", fontsize=10)\n",
    "    plt.ylabel(\"mean score\", fontsize=10)\n",
    "    plt.title(func_name)\n",
    "    plt.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc=\"upper left\", borderaxespad=0.0)\n",
    "    plt.grid()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "bobm_ipynb",
   "language": "python",
   "name": "bobm_ipynb"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: requirements/base.in
================================================
scipy==1.2.0
pandas==0.24.0
pathvalidate==0.29.0
numpy==1.16.1
GitPython==2.1.11
importlib-metadata==0.18
scikit-learn==0.20.2
xarray==0.12.2


================================================
FILE: requirements/base.txt
================================================
# SHA1:7ebe4df9e60f001b676e74ae561d5dc3202c3dd0
#
# This file is autogenerated by pip-compile-multi
# To update, run:
#
#    pip-compile-multi
#
gitdb2==2.0.5             # via gitpython
gitpython==2.1.11         # via -r requirements/base.in
importlib-metadata==0.18  # via -r requirements/base.in
numpy==1.16.1             # via -r requirements/base.in, pandas, scikit-learn, scipy, xarray
pandas==0.24.0            # via -r requirements/base.in, xarray
pathvalidate==0.29.0      # via -r requirements/base.in
python-dateutil==2.8.0    # via pandas
pytz==2019.1              # via pandas
scikit-learn==0.20.2      # via -r requirements/base.in
scipy==1.2.0              # via -r requirements/base.in, scikit-learn
six==1.12.0               # via python-dateutil
smmap2==2.0.5             # via gitdb2
xarray==0.12.2            # via -r requirements/base.in
zipp==0.5.2               # via importlib-metadata

# The following packages are considered to be unsafe in a requirements file:
# setuptools


================================================
FILE: requirements/docs.in
================================================
-r base.in
Sphinx==2.1.2


================================================
FILE: requirements/docs.txt
================================================
# SHA1:cde26afc07f6c9c1c6cb169e125fc5142a0c59ae
#
# This file is autogenerated by pip-compile-multi
# To update, run:
#
#    pip-compile-multi
#
-r base.txt
alabaster==0.7.12         # via sphinx
attrs==19.1.0             # via packaging
babel==2.7.0              # via sphinx
certifi==2019.6.16        # via requests
chardet==3.0.4            # via requests
docutils==0.15            # via sphinx
idna==2.8                 # via requests
imagesize==1.1.0          # via sphinx
jinja2==2.10.1            # via sphinx
markupsafe==1.1.1         # via jinja2
packaging==19.1           # via sphinx
pygments==2.4.2           # via sphinx
pyparsing==2.4.2          # via packaging
requests==2.22.0          # via sphinx
snowballstemmer==1.9.0    # via sphinx
sphinx==2.1.2             # via -r requirements/docs.in
sphinxcontrib-applehelp==1.0.1  # via sphinx
sphinxcontrib-devhelp==1.0.1  # via sphinx
sphinxcontrib-htmlhelp==1.0.2  # via sphinx
sphinxcontrib-jsmath==1.0.1  # via sphinx
sphinxcontrib-qthelp==1.0.2  # via sphinx
sphinxcontrib-serializinghtml==1.1.3  # via sphinx
urllib3==1.25.3           # via requests

# The following packages are considered to be unsafe in a requirements file:
# setuptools


================================================
FILE: requirements/ipynb.in
================================================
-r base.in
ipykernel==5.1.1
nbconvert==5.6.0
jupyter==1.0.0
jupyter-core==4.6.0
matplotlib==3.1.1
numpy==1.16.1


================================================
FILE: requirements/ipynb.txt
================================================
# SHA1:6c16d140e48d7e7fa0e157c053953db7d76f0caf
#
# This file is autogenerated by pip-compile-multi
# To update, run:
#
#    pip-compile-multi
#
-r base.txt
appnope==0.1.0            # via ipython
attrs==19.1.0             # via jsonschema
backcall==0.1.0           # via ipython
bleach==3.1.0             # via nbconvert
cycler==0.10.0            # via matplotlib
decorator==4.4.0          # via ipython, traitlets
defusedxml==0.6.0         # via nbconvert
entrypoints==0.3          # via nbconvert
ipykernel==5.1.1          # via -r requirements/ipynb.in, ipywidgets, jupyter, jupyter-console, notebook, qtconsole
ipython-genutils==0.2.0   # via nbformat, notebook, qtconsole, traitlets
ipython==7.6.1            # via ipykernel, ipywidgets, jupyter-console
ipywidgets==7.5.1         # via jupyter
jedi==0.14.1              # via ipython
jinja2==2.10.1            # via nbconvert, notebook
jsonschema==3.0.2         # via nbformat
jupyter-client==5.3.1     # via ipykernel, jupyter-console, notebook, qtconsole
jupyter-console==6.0.0    # via jupyter
jupyter-core==4.6.0       # via -r requirements/ipynb.in, jupyter-client, nbconvert, nbformat, notebook, qtconsole
jupyter==1.0.0            # via -r requirements/ipynb.in
kiwisolver==1.1.0         # via matplotlib
markupsafe==1.1.1         # via jinja2
matplotlib==3.1.1         # via -r requirements/ipynb.in
mistune==0.8.4            # via nbconvert
nbconvert==5.6.0          # via -r requirements/ipynb.in, jupyter, notebook
nbformat==4.4.0           # via ipywidgets, nbconvert, notebook
notebook==6.0.1           # via jupyter, widgetsnbextension
pandocfilters==1.4.2      # via nbconvert
parso==0.5.1              # via jedi
pexpect==4.7.0            # via ipython
pickleshare==0.7.5        # via ipython
prometheus-client==0.7.1  # via notebook
prompt-toolkit==2.0.9     # via ipython, jupyter-console
ptyprocess==0.6.0         # via pexpect, terminado
pygments==2.4.2           # via ipython, jupyter-console, nbconvert, qtconsole
pyparsing==2.4.2          # via matplotlib
pyrsistent==0.15.4        # via jsonschema
pyzmq==18.0.2             # via jupyter-client, notebook
qtconsole==4.5.5          # via jupyter
send2trash==1.5.0         # via notebook
terminado==0.8.2          # via notebook
testpath==0.4.2           # via nbconvert
tornado==6.0.3            # via ipykernel, jupyter-client, notebook, terminado
traitlets==4.3.2          # via ipykernel, ipython, ipywidgets, jupyter-client, jupyter-core, nbconvert, nbformat, notebook, qtconsole
wcwidth==0.1.7            # via prompt-toolkit
webencodings==0.5.1       # via bleach
widgetsnbextension==3.5.1  # via ipywidgets

# The following packages are considered to be unsafe in a requirements file:
# setuptools


================================================
FILE: requirements/optimizers.in
================================================
-r base.in
opentuner==0.8.2
numpy==1.16.1
scipy==1.2.0
nevergrad==0.1.4
hyperopt==0.1.1
POAP==0.1.26
scikit-optimize==0.5.2
pySOT==0.3.3


================================================
FILE: requirements/optimizers.txt
================================================
# SHA1:08174a35f9973427450f549131b4438e2f116a88
#
# This file is autogenerated by pip-compile-multi
# To update, run:
#
#    pip-compile-multi
#
-r base.txt
argparse==1.4.0           # via opentuner
atomicwrites==1.3.0       # via pytest
attrs==19.1.0             # via packaging, pytest
bayesian-optimization==0.6.0  # via nevergrad
certifi==2019.6.16        # via requests
chardet==3.0.4            # via requests
cma==2.7.0                # via nevergrad
coverage==4.5.4           # via nevergrad
cycler==0.10.0            # via matplotlib
decorator==4.4.0          # via networkx
dill==0.3.0               # via pysot
fn==0.4.3                 # via opentuner
future==0.17.1            # via hyperopt, opentuner
genty==1.3.2              # via nevergrad
hyperopt==0.1.1           # via -r requirements/optimizers.in
idna==2.8                 # via requests
joblib==0.13.2            # via nevergrad
kiwisolver==1.1.0         # via matplotlib
matplotlib==3.1.1         # via nevergrad
more-itertools==7.2.0     # via pytest
mypy-extensions==0.4.1    # via mypy
mypy==0.720               # via nevergrad
networkx==2.3             # via hyperopt
nevergrad==0.1.4          # via -r requirements/optimizers.in
nose-timer==0.7.5         # via nevergrad
nose==1.3.7               # via nevergrad, nose-timer
opentuner==0.8.2          # via -r requirements/optimizers.in
packaging==19.1           # via pytest
pluggy==0.12.0            # via pytest
poap==0.1.26              # via -r requirements/optimizers.in, pysot
py==1.8.0                 # via pytest
pydoe2==1.2.0             # via pysot
pymongo==3.8.0            # via hyperopt
pyparsing==2.4.2          # via matplotlib, packaging
pysot==0.3.3              # via -r requirements/optimizers.in
pytest==5.0.1             # via pysot
requests==2.22.0          # via nevergrad
scikit-optimize==0.5.2    # via -r requirements/optimizers.in
sqlalchemy==1.3.8         # via opentuner
typed-ast==1.4.0          # via mypy
typing-extensions==3.7.4  # via mypy, nevergrad
urllib3==1.25.3           # via requests
wcwidth==0.1.7            # via pytest
xlrd==1.2.0               # via nevergrad
xlwt==1.3.0               # via nevergrad

# The following packages are considered to be unsafe in a requirements file:
# setuptools


================================================
FILE: requirements/pipreqs_edits.sed
================================================
/argparse/d
/appnope/d
/certifi/d
/bayesmark/d


================================================
FILE: requirements/self.txt
================================================
bayesmark==0.0.8


================================================
FILE: requirements/test.in
================================================
-r base.in
-r optimizers.in
hypothesis==4.32.3
hypothesis-gufunc==0.0.5rc2
numpy==1.16.1
pathvalidate==0.29.0
scipy==1.2.0
scikit-learn==0.20.2
xarray==0.12.2
pytest==5.0.1
pytest-cov==2.7.1


================================================
FILE: requirements/test.txt
================================================
# SHA1:0dd8b5c26e6671e320706ddd399f6f62e19f3189
#
# This file is autogenerated by pip-compile-multi
# To update, run:
#
#    pip-compile-multi
#
-r base.txt
-r optimizers.txt
hypothesis-gufunc==0.0.5rc2  # via -r requirements/test.in
hypothesis==4.32.3        # via -r requirements/test.in, hypothesis-gufunc
pytest-cov==2.7.1         # via -r requirements/test.in

# The following packages are considered to be unsafe in a requirements file:
# setuptools


================================================
FILE: requirements/tools.in
================================================
detect-secrets==0.12.5
ipykernel==5.1.1
nbconvert==5.6.0
pip-compile-multi==1.4.0
pipreqs==0.4.9
pre-commit==1.15.2
pytest==5.0.1


================================================
FILE: requirements/tools.txt
================================================
# SHA1:08f4ed4790290aab315dd20169793be4f0a974af
#
# This file is autogenerated by pip-compile-multi
# To update, run:
#
#    pip-compile-multi
#
appnope==0.1.0            # via ipython
aspy.yaml==1.3.0          # via pre-commit
atomicwrites==1.3.0       # via pytest
attrs==19.1.0             # via jsonschema, packaging, pytest
backcall==0.1.0           # via ipython
bleach==3.1.0             # via nbconvert
certifi==2019.6.16        # via requests
cfgv==2.0.1               # via pre-commit
chardet==3.0.4            # via requests
click==7.0                # via pip-compile-multi, pip-tools
decorator==4.4.0          # via ipython, traitlets
defusedxml==0.6.0         # via nbconvert
detect-secrets==0.12.5    # via -r requirements/tools.in
docopt==0.6.2             # via pipreqs
entrypoints==0.3          # via nbconvert
identify==1.4.5           # via pre-commit
idna==2.8                 # via requests
importlib-metadata==0.18  # via importlib-resources, pluggy, pre-commit, pytest
importlib-resources==2.0.1  # via pre-commit
ipykernel==5.1.1          # via -r requirements/tools.in
ipython-genutils==0.2.0   # via nbformat, traitlets
ipython==7.6.1            # via ipykernel
jedi==0.14.1              # via ipython
jinja2==2.10.1            # via nbconvert
jsonschema==3.0.2         # via nbformat
jupyter-client==5.3.1     # via ipykernel
jupyter-core==4.6.0       # via jupyter-client, nbconvert, nbformat
markupsafe==1.1.1         # via jinja2
mistune==0.8.4            # via nbconvert
more-itertools==7.2.0     # via pytest
nbconvert==5.6.0          # via -r requirements/tools.in
nbformat==4.4.0           # via nbconvert
nodeenv==1.3.3            # via pre-commit
packaging==19.1           # via pytest
pandocfilters==1.4.2      # via nbconvert
parso==0.5.1              # via jedi
pexpect==4.7.0            # via ipython
pickleshare==0.7.5        # via ipython
pip-compile-multi==1.4.0  # via -r requirements/tools.in
pip-tools==5.0.0          # via pip-compile-multi
pipreqs==0.4.9            # via -r requirements/tools.in
pluggy==0.12.0            # via pytest
pre-commit==1.15.2        # via -r requirements/tools.in
prompt-toolkit==2.0.9     # via ipython
ptyprocess==0.6.0         # via pexpect
py==1.8.0                 # via pytest
pygments==2.4.2           # via ipython, nbconvert
pyparsing==2.4.2          # via packaging
pyrsistent==0.15.4        # via jsonschema
pytest==5.0.1             # via -r requirements/tools.in
python-dateutil==2.8.0    # via jupyter-client
pyyaml==5.1.1             # via aspy.yaml, detect-secrets, pre-commit
pyzmq==18.0.2             # via jupyter-client
requests==2.22.0          # via detect-secrets, yarg
six==1.12.0               # via bleach, cfgv, jsonschema, packaging, pip-tools, pre-commit, prompt-toolkit, python-dateutil, traitlets
testpath==0.4.2           # via nbconvert
toml==0.10.0              # via pre-commit
toposort==1.5             # via pip-compile-multi
tornado==6.0.3            # via ipykernel, jupyter-client
traitlets==4.3.2          # via ipykernel, ipython, jupyter-client, jupyter-core, nbconvert, nbformat
urllib3==1.25.3           # via requests
virtualenv==16.7.2        # via pre-commit
wcwidth==0.1.7            # via prompt-toolkit, pytest
webencodings==0.5.1       # via bleach
yarg==0.1.9               # via pipreqs
zipp==0.5.2               # via importlib-metadata, importlib-resources

# The following packages are considered to be unsafe in a requirements file:
# pip
# setuptools


================================================
FILE: setup.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from setuptools import find_packages, setup

CMD_NAME = "bayesmark"

# Strings to remove from README to make it PyPI friendly. See:
# https://packaging.python.org/guides/making-a-pypi-friendly-readme/#validating-restructuredtext-markup
REMOVE_FROM_RST = (":func:", ":ref:")


def read_requirements(name):
    with open("requirements/" + name + ".in") as f:
        requirements = f.read().strip()
    requirements = requirements.replace("==", ">=").splitlines()  # Loosen strict pins
    return [pp for pp in requirements if pp[0].isalnum()]


# Derive install requires from base.in first order requirements
requirements = read_requirements("base")
opt_requirements = read_requirements("optimizers")
ipynb_requirements = read_requirements("ipynb")

with open("README.rst") as f:
    long_description = f.read()
# Probably more efficient way to do this with regex but good enough
for remove_word in REMOVE_FROM_RST:
    long_description = long_description.replace(remove_word, "")

setup(
    name="bayesmark",
    version="0.0.8",
    packages=find_packages(),
    url="https://github.com/uber/bayesmark/",
    author="Ryan Turner",
    author_email=("rdturnermtl@github.com"),
    license="Apache v2",
    description="Bayesian optimization benchmark system",
    install_requires=requirements,
    extras_require={"optimizers": opt_requirements, "notebooks": ipynb_requirements},
    long_description=long_description,
    long_description_content_type="text/x-rst",
    platforms=["any"],
    entry_points={
        "console_scripts": [
            CMD_NAME + "-init = bayesmark.experiment_db_init:main",
            CMD_NAME + "-launch = bayesmark.experiment_launcher:main",
            CMD_NAME + "-agg = bayesmark.experiment_aggregate:main",
            CMD_NAME + "-baseline = bayesmark.experiment_baseline:main",
            CMD_NAME + "-anal = bayesmark.experiment_analysis:main",
            CMD_NAME + "-exp = bayesmark.experiment:main",
        ]
    },
)


================================================
FILE: test/data_test.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from hypothesis import given
from hypothesis.strategies import from_regex, sampled_from

from bayesmark import data

DATA_NAMES = sorted(data.DATA_LOADERS.keys())


@given(sampled_from(DATA_NAMES) | from_regex("^reg-[A-Z]*") | from_regex("^clf-[A-Z]*"))
def test_get_problem_type(dataset_name):
    problem_type = data.get_problem_type(dataset_name)
    assert problem_type is not None


================================================
FILE: test/dummy.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pytest
import pytest_cov

# import extra deps and use it to keep pipreqs and flake8 happy
for pkg in (pytest, pytest_cov):
    print("%s %s" % (pkg.__name__, pkg.__version__))


================================================
FILE: test/expected_max_test.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from hypothesis import given
from hypothesis.strategies import floats, integers, lists

import bayesmark.expected_max as em


@given(integers(1, 10), integers(1, 10))
def test_get_expected_max_weights(n, m):
    pdf = em.get_expected_max_weights(n, m)
    assert pdf is not None


@given(lists(floats()), integers(1, 10))
def test_expected_max(x, m):
    E_max_x = em.expected_max(x, m)
    assert E_max_x is not None


@given(lists(floats()), integers(1, 10))
def test_expected_min(x, m):
    E_min_x = em.expected_min(x, m)
    assert E_min_x is not None


================================================
FILE: test/experiment_aggregate_test.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from itertools import product

import numpy as np
from hypothesis import HealthCheck, given, settings
from hypothesis.strategies import floats
from hypothesis_gufunc.extra.xr import (
    fixed_datasets,
    simple_coords,
    simple_dataarrays,
    simple_datasets,
    xr_coords,
    xr_dims,
)

import bayesmark.experiment_aggregate as agg
from bayesmark.constants import EVAL_PHASE, ITER, METHOD, OBS_PHASE, SUGGEST, SUGGEST_PHASE, TEST_CASE, TRIAL
from bayesmark.experiment import OBJECTIVE_NAMES
from bayesmark.signatures import N_SUGGESTIONS

N_SIG = N_SUGGESTIONS
SIG_POINT = "sig_point"


def data_to_concat():
    def separate(ds):
        G = product(
            ds.coords[TEST_CASE].values.tolist(), ds.coords[METHOD].values.tolist(), ds.coords[TRIAL].values.tolist()
        )

        L = []
        for test_case, method, trial in G:
            # Could swap out trial for UUID here
            meta_data = (test_case, method, trial)

            ds_sub = ds.sel({TEST_CASE: test_case, METHOD: method, TRIAL: trial}, drop=True)

            perf_ds = ds_sub[list(OBJECTIVE_NAMES)]
            time_ds = ds_sub[[SUGGEST_PHASE, EVAL_PHASE, OBS_PHASE]]
            suggest_ds = ds_sub[["foo", "bar", "baz"]]
            sig = ds_sub["sig"].values.tolist()
            data = (perf_ds, time_ds, suggest_ds, sig)
            L.append((meta_data, data))
            assert not any(np.any(np.isnan(perf_ds[kk].values)) for kk in perf_ds)
            assert not any(np.any(np.isnan(time_ds[kk].values)) for kk in time_ds)
            assert not any(np.any(np.isnan(suggest_ds[kk].values)) for kk in suggest_ds)
            assert not np.any(np.isnan(sig))
        return L

    vars_to_dims = {
        "sig": (SIG_POINT, TEST_CASE, METHOD, TRIAL),
        SUGGEST_PHASE: (ITER, TEST_CASE, METHOD, TRIAL),
        EVAL_PHASE: (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL),
        OBS_PHASE: (ITER, TEST_CASE, METHOD, TRIAL),
    }
    dtype = {SUGGEST_PHASE: np.float_, EVAL_PHASE: np.float_, OBS_PHASE: np.float_, "sig": np.float_}

    for obj in OBJECTIVE_NAMES:
        vars_to_dims[obj] = (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)
        dtype[obj] = np.float_

    # We should also generate this using the space strategy, but hard coding this test case is good enough got now.
    input_vars = {"foo": np.float_, "bar": np.float_, "baz": np.int_}
    for vv, dd in input_vars.items():
        vars_to_dims[vv] = (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)
        dtype[vv] = dd

    float_no_nan = floats(allow_nan=False, min_value=-10, max_value=10)
    # Using on str following dim conventions for coords here
    coords_st = {
        ITER: simple_coords(min_side=1),
        SUGGEST: simple_coords(min_side=1),
        TEST_CASE: xr_coords(elements=xr_dims(), min_side=1),
        METHOD: xr_coords(elements=xr_dims(), min_side=1),
        TRIAL: simple_coords(min_side=1),
        SIG_POINT: simple_coords(min_side=N_SIG, max_side=N_SIG),
    }
    S = fixed_datasets(vars_to_dims, dtype=dtype, elements=float_no_nan, coords_st=coords_st, min_side=1).map(separate)
    return S


def time_datasets():
    vars_to_dims = {SUGGEST_PHASE: (ITER,), EVAL_PHASE: (ITER, SUGGEST), OBS_PHASE: (ITER,)}
    dtype = {SUGGEST_PHASE: np.float_, EVAL_PHASE: np.float_, OBS_PHASE: np.float_}
    elements = floats(min_value=0, allow_infinity=False, allow_nan=False)
    S = simple_datasets(vars_to_dims, dtype=dtype, elements=elements, min_side=1)
    return S


def perf_dataarrays():
    dims = (ITER, SUGGEST)
    elements = floats(allow_nan=False)
    S = simple_dataarrays(dims, dtype=np.float_, elements=elements)
    return S


@given(time_datasets())
def test_summarize_time(all_time):
    time_summary = agg.summarize_time(all_time)
    assert time_summary is not None


@given(data_to_concat())
@settings(deadline=None, suppress_health_check=(HealthCheck.too_slow,))
def test_concat_experiments(all_experiments):
    all_experiments = list(all_experiments)
    all_perf, all_time, all_suggest, all_sigs = agg.concat_experiments(all_experiments, ravel=False)


================================================
FILE: test/experiment_analysis_test.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from hypothesis import given, settings

import bayesmark.experiment_baseline as base
from bayesmark import experiment_analysis as anal
from bayesmark.constants import TRIAL
from bayesmark.np_util import argmin_2d
from hypothesis_util import gufunc_floats
from util import perf_dataarrays


@given(gufunc_floats("(n,p,t),(n,p,t)->(n,t)", allow_nan=False, unique=True, min_side=1))
def test_get_perf_array(args):
    """Behavior for tie-breaking in `evals_visible` is complex, so only testing all unique case here."""
    evals, evals_visible = args

    n_iter, _, n_trials = evals.shape

    perf_array = anal.get_perf_array(evals, evals_visible)
    assert perf_array.shape == (n_iter, n_trials)

    for ii in range(n_iter):
        for jj in range(n_trials):
            idx0, idx1 = argmin_2d(evals_visible[: ii + 1, :, jj])
            assert perf_array[ii, jj] == evals[idx0, idx1, jj]


@given(perf_dataarrays(min_trial=2))
@settings(deadline=None)
def test_compute_aggregates(perf_da):
    n_trial = perf_da.sizes[TRIAL]

    split = n_trial // 2
    assert isinstance(split, int)

    perf_da1 = perf_da.isel({TRIAL: slice(None, split)})
    assert perf_da1.sizes[TRIAL] >= 1

    perf_da2 = perf_da.isel({TRIAL: slice(split, None)})
    assert perf_da2.sizes[TRIAL] >= 1
    perf_da2.coords[TRIAL] = list(range(perf_da2.sizes[TRIAL]))

    baseline_ds = base.compute_baseline(perf_da1)
    anal.compute_aggregates(perf_da2, baseline_ds)


@given(perf_dataarrays(min_trial=4))
@settings(deadline=None)
def test_compute_aggregates_with_aux(perf_da):
    # Split to get baseline
    n_trial = perf_da.sizes[TRIAL]
    split = n_trial // 2
    assert isinstance(split, int)
    perf_da1 = perf_da.isel({TRIAL: slice(None, split)})
    assert perf_da1.sizes[TRIAL] >= 1
    perf_da2 = perf_da.isel({TRIAL: slice(split, None)})
    assert perf_da2.sizes[TRIAL] >= 1
    perf_da2.coords[TRIAL] = list(range(perf_da2.sizes[TRIAL]))
    baseline_ds = base.compute_baseline(perf_da1)
    perf_da = perf_da2

    # Split to get visible
    n_trial = perf_da.sizes[TRIAL]
    split = n_trial // 2
    assert isinstance(split, int)
    perf_da1 = perf_da.isel({TRIAL: slice(None, split)})
    assert perf_da1.sizes[TRIAL] >= 1
    perf_da2 = perf_da.isel({TRIAL: slice(split, 2 * split)})
    assert perf_da2.sizes[TRIAL] >= 1
    perf_da2.coords[TRIAL] = list(range(perf_da2.sizes[TRIAL]))

    anal.compute_aggregates(perf_da2, baseline_ds, visible_perf_da=perf_da1)


================================================
FILE: test/experiment_baseline_test.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import warnings

from hypothesis import given, settings

import bayesmark.experiment_baseline as base
from util import perf_dataarrays


@given(perf_dataarrays())
@settings(deadline=None)
def test_compute_baseline(perf_da):
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=RuntimeWarning)
        baseline_ds = base.compute_baseline(perf_da)
    assert baseline_ds is not None


================================================
FILE: test/experiment_db_init_test.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import bayesmark.experiment_db_init as dbi


def test_main():
    # Really a nop test since there is nothing to test in this func
    assert dbi.EXIST_OK


================================================
FILE: test/experiment_launcher_test.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import warnings
from io import StringIO
from string import ascii_letters, digits

import numpy as np
from hypothesis import HealthCheck, assume, given, settings
from hypothesis.strategies import (
    booleans,
    fixed_dictionaries,
    from_regex,
    integers,
    lists,
    sampled_from,
    text,
    tuples,
    uuids,
)
from pathvalidate.argparse import validate_filename, validate_filepath

from bayesmark import data
from bayesmark import experiment_launcher as launcher
from bayesmark.builtin_opt.config import CONFIG
from bayesmark.cmd_parse import CmdArgs
from bayesmark.constants import ARG_DELIM, METRICS, MODEL_NAMES
from hypothesis_util import seeds

DATA_NAMES = sorted(data.DATA_LOADERS.keys())


def filepaths():
    def valid(ss):
        try:
            validate_filepath(ss)
        except Exception:
            return False
        return True

    alphabet = ascii_letters + digits + "_.-~" + os.sep
    S = text(alphabet=alphabet, min_size=1).map(lambda ss: os.sep + ss).filter(valid)
    return S


def filenames(suffix=""):
    def valid(ss):
        try:
            validate_filename(ss)
        except Exception:
            return False
        return True

    alphabet = ascii_letters + digits + "_.-~"
    S = text(alphabet=alphabet, min_size=1).map(lambda ss: ss + suffix).filter(valid)
    return S


def joinables():
    S = filenames().filter(lambda ss: ARG_DELIM not in ss)
    return S


def datasets():
    return sampled_from(DATA_NAMES) | from_regex("^reg-[A-Z]*$") | from_regex("^clf-[A-Z]*$")


def launcher_args(opts, min_jobs=0):
    args_dict = {
        CmdArgs.db_root: filepaths(),
        CmdArgs.optimizer_root: filepaths(),
        CmdArgs.uuid: uuids(),
        CmdArgs.data_root: filepaths(),
        CmdArgs.db: filenames(),
        CmdArgs.optimizer: lists(sampled_from(opts), min_size=1, max_size=len(opts)),
        CmdArgs.data: lists(datasets(), min_size=1),
        CmdArgs.classifier: lists(sampled_from(MODEL_NAMES), min_size=1, max_size=len(MODEL_NAMES)),
        CmdArgs.metric: lists(sampled_from(METRICS), min_size=1, max_size=len(METRICS)),
        CmdArgs.n_calls: integers(1, 100),
        CmdArgs.n_suggest: integers(1, 100),
        CmdArgs.n_repeat: integers(1, 100),
        CmdArgs.n_jobs: integers(min_jobs, 1000),
        CmdArgs.jobs_file: filepaths(),
        CmdArgs.verbose: booleans(),
    }
    S = fixed_dictionaries(args_dict)
    return S


def launcher_args_and_config(min_jobs=0):
    def args_and_config(opts):
        args = launcher_args(opts, min_jobs=min_jobs)
        configs = fixed_dictionaries({ss: filenames(suffix=".py") for ss in opts})
        args_and_configs = tuples(args, configs)
        return args_and_configs

    # Make opt names a mix of built in opts and arbitrary names
    optimizers = lists(joinables() | sampled_from(sorted(CONFIG.keys())), min_size=1)
    S = optimizers.flatmap(args_and_config)
    return S


def test_is_arg_safe_empty():
    val = launcher._is_arg_safe("")
    assert isinstance(val, bool)
    assert not val


@given(launcher_args_and_config(), uuids())
@settings(deadline=None, suppress_health_check=(HealthCheck.too_slow,))
def test_gen_commands(args, run_uuid):
    args, opt_file_lookup = args

    assume(all(launcher._is_arg_safe(ss) for ss in args.values() if isinstance(ss, str)))

    uniqify = [CmdArgs.optimizer, CmdArgs.data, CmdArgs.classifier, CmdArgs.metric]
    for uu in uniqify:
        assume(all(launcher._is_arg_safe(ss) for ss in args[uu]))
        args[uu] = list(set(args[uu]))

    m_set = set(args[CmdArgs.metric])
    m_lookup = {problem_type: sorted(m_set.intersection(mm)) for problem_type, mm in data.METRICS_LOOKUP.items()}
    ok = all(len(m_lookup[data.get_problem_type(dd)]) > 0 for dd in args[CmdArgs.data])
    assume(ok)

    G = launcher.gen_commands(args, opt_file_lookup, run_uuid)
    L = list(G)
    assert L is not None


@given(launcher_args_and_config(min_jobs=1), uuids(), seeds())
@settings(deadline=None, suppress_health_check=(HealthCheck.too_slow,))
def test_dry_run(args, run_uuid, seed):
    args, opt_file_lookup = args

    assume(all(launcher._is_arg_safe(ss) for ss in args.values() if isinstance(ss, str)))

    uniqify = [CmdArgs.optimizer, CmdArgs.data, CmdArgs.classifier, CmdArgs.metric]
    for uu in uniqify:
        assume(all(launcher._is_arg_safe(ss) for ss in args[uu]))
        args[uu] = list(set(args[uu]))

    m_set = set(args[CmdArgs.metric])
    m_lookup = {problem_type: sorted(m_set.intersection(mm)) for problem_type, mm in data.METRICS_LOOKUP.items()}
    ok = all(len(m_lookup[data.get_problem_type(dd)]) > 0 for dd in args[CmdArgs.data])
    assume(ok)

    fp_buf = StringIO()
    random = np.random.RandomState(seed)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=UserWarning)
        launcher.dry_run(args, opt_file_lookup, run_uuid, fp_buf, random=random)

    jobs = fp_buf.getvalue()
    assert jobs is not None


================================================
FILE: test/experiment_test.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import inspect
import os.path

import numpy as np
from hypothesis import assume, given, settings
from hypothesis.strategies import floats, integers, sampled_from, text
from hypothesis_gufunc.extra.xr import simple_datasets
from hypothesis_gufunc.gufunc import gufunc_args

import bayesmark.experiment as exp
import bayesmark.random_search as rs
from bayesmark import data, np_util
from bayesmark.abstract_optimizer import AbstractOptimizer
from bayesmark.builtin_opt.config import CONFIG
from bayesmark.constants import DATA_LOADER_NAMES, ITER, METRICS, MODEL_NAMES, SUGGEST
from bayesmark.sklearn_funcs import SklearnModel, TestFunction
from hypothesis_util import seeds
from util import space_configs


class RandomOptimizer(AbstractOptimizer):
    # Unclear what is best package to list for primary_import here.
    primary_import = "bayesmark"

    def __init__(self, api_config, random=np_util.random, flaky=False):
        AbstractOptimizer.__init__(self, api_config)
        self.random = random
        self.flaky = flaky

    def suggest(self, n_suggestions=1):
        if self.flaky:
            assert self.random.rand() <= 0.5
        x_guess = rs.suggest_dict([], [], self.api_config, n_suggestions=n_suggestions, random=self.random)
        return x_guess

    def observe(self, X, y):
        # Random search so don't do anything for observe
        if self.flaky:
            assert self.random.rand() <= 0.5


class OutOfBoundsOptimizer(AbstractOptimizer):
    def __init__(self, api_config, random=np_util.random):
        AbstractOptimizer.__init__(self, api_config)
        self.random = random
        self.param_list = sorted([kk for kk in api_config.keys() if api_config[kk]["type"] in ("real", "int")])

    def suggest(self, n_suggestions=1):
        x_guess = rs.suggest_dict([], [], self.api_config, n_suggestions=n_suggestions, random=self.random)

        ii = self.random.randint(0, n_suggestions)
        pp = self.random.choice(self.param_list)

        if self.api_config[pp]["type"] == "real":
            eps = self.random.rand()
        else:
            eps = self.random.randint(1, 10)

        if self.random.rand() <= 0.5:
            x_guess[ii][pp] = self.api_config[pp]["range"][0] - eps
        else:
            x_guess[ii][pp] = self.api_config[pp]["range"][1] + eps
        return x_guess

    def observe(self, X, y):
        pass


class FlakyProblem(TestFunction):
    def __init__(self, api_config, random):
        TestFunction.__init__(self)
        self.api_config = api_config
        self.random = random

    def evaluate(self, params):
        assert self.random.rand() <= 0.5
        return [0.0]


@given(
    sampled_from(MODEL_NAMES),
    sampled_from(DATA_LOADER_NAMES),
    sampled_from(METRICS),
    integers(0, 5),
    integers(1, 3),
    seeds(),
)
@settings(max_examples=10, deadline=None)
def test_run_study(model_name, dataset, scorer, n_calls, n_suggestions, seed):
    prob_type = data.get_problem_type(dataset)
    assume(scorer in data.METRICS_LOOKUP[prob_type])

    function_instance = SklearnModel(model_name, dataset, scorer)
    optimizer = RandomOptimizer(function_instance.get_api_config(), random=np.random.RandomState(seed))
    optimizer.get_version()
    exp.run_study(optimizer, function_instance, n_calls, n_suggestions, n_obj=len(function_instance.objective_names))


@given(
    sampled_from(MODEL_NAMES),
    sampled_from(DATA_LOADER_NAMES),
    sampled_from(METRICS),
    integers(1, 5),
    integers(1, 3),
    seeds(),
)
def test_run_study_bounds_fail(model_name, dataset, scorer, n_calls, n_suggestions, seed):
    prob_type = data.get_problem_type(dataset)
    assume(scorer in data.METRICS_LOOKUP[prob_type])

    function_instance = SklearnModel(model_name, dataset, scorer)
    optimizer = OutOfBoundsOptimizer(function_instance.get_api_config(), random=np.random.RandomState(seed))
    optimizer.get_version()

    # pytest have some assert failed tools we could use instead, but this is ok for now
    bounds_fails = False
    try:
        exp.run_study(
            optimizer, function_instance, n_calls, n_suggestions, n_obj=len(function_instance.objective_names)
        )
    except Exception as e:
        bounds_fails = str(e) == "Optimizer suggestion is out of range."
    assert bounds_fails


@given(
    sampled_from(MODEL_NAMES),
    sampled_from(DATA_LOADER_NAMES),
    sampled_from(METRICS),
    integers(0, 5),
    integers(1, 3),
    seeds(),
)
@settings(max_examples=10, deadline=None)
def test_run_study_callback(model_name, dataset, scorer, n_calls, n_suggestions, seed):
    prob_type = data.get_problem_type(dataset)
    assume(scorer in data.METRICS_LOOKUP[prob_type])

    function_instance = SklearnModel(model_name, dataset, scorer)
    optimizer = RandomOptimizer(function_instance.get_api_config(), random=np.random.RandomState(seed))
    optimizer.get_version()
    n_obj = len(function_instance.objective_names)

    function_evals_cmin = np.zeros((n_calls, n_obj), dtype=float)
    iters_list = []

    def callback(f_min, iters):
        assert f_min.shape == (n_obj,)

        iters_list.append(iters)
        if iters == 0:
            assert np.all(f_min == np.inf)
            return

        function_evals_cmin[iters - 1, :] = f_min

    function_evals, _, _ = exp.run_study(
        optimizer, function_instance, n_calls, n_suggestions, n_obj=n_obj, callback=callback
    )

    assert iters_list == list(range(n_calls + 1))

    for ii in range(n_obj):
        for jj in range(n_calls):
            idx0, idx1 = np_util.argmin_2d(function_evals[: jj + 1, :, 0])
            assert function_evals_cmin[jj, ii] == function_evals[idx0, idx1, ii]


@given(space_configs(allow_missing=True), integers(0, 5), integers(1, 3), seeds(), seeds())
@settings(deadline=None)
def test_run_study_flaky(api_config, n_calls, n_suggestions, seed1, seed2):
    api_config, _, _, _ = api_config

    function_instance = FlakyProblem(api_config=api_config, random=np.random.RandomState(seed1))
    optimizer = RandomOptimizer(api_config, random=np.random.RandomState(seed2), flaky=True)
    optimizer.get_version()
    exp.run_study(optimizer, function_instance, n_calls, n_suggestions)


@given(
    space_configs(allow_missing=True),
    sampled_from(MODEL_NAMES),
    sampled_from(DATA_LOADER_NAMES),
    sampled_from(METRICS),
    integers(0, 5),
    integers(1, 3),
    seeds(),
)
@settings(max_examples=10, deadline=None)
def test_run_sklearn_study(api_config, model_name, dataset, scorer, n_calls, n_suggestions, seed):
    prob_type = data.get_problem_type(dataset)
    assume(scorer in data.METRICS_LOOKUP[prob_type])

    random = np.random.RandomState(seed)
    exp.run_sklearn_study(RandomOptimizer, {"random": random}, model_name, dataset, scorer, n_calls, n_suggestions)


@given(
    space_configs(allow_missing=True),
    sampled_from(MODEL_NAMES),
    sampled_from(DATA_LOADER_NAMES),
    sampled_from(METRICS),
    integers(0, 5),
    integers(1, 3),
)
@settings(max_examples=10, deadline=None)
def test_run_sklearn_study_real(api_config, model_name, dataset, scorer, n_calls, n_suggestions):
    prob_type = data.get_problem_type(dataset)
    assume(scorer in data.METRICS_LOOKUP[prob_type])

    # Should really do parametric test but for loop good enough
    for opt_name in sorted(CONFIG.keys()):
        opt_class = exp._get_opt_class(opt_name)
        # opt_root=None should work with built-in opt
        opt_kwargs = exp.load_optimizer_kwargs(opt_name, opt_root=None)

        exp.run_sklearn_study(opt_class, opt_kwargs, model_name, dataset, scorer, n_calls, n_suggestions)


@given(sampled_from(MODEL_NAMES), sampled_from(DATA_LOADER_NAMES), sampled_from(METRICS))
@settings(deadline=None)
def test_get_objective_signature(model_name, dataset, scorer):
    prob_type = data.get_problem_type(dataset)
    assume(scorer in data.METRICS_LOOKUP[prob_type])

    exp.get_objective_signature(model_name, dataset, scorer)


@given(gufunc_args("(n,m,k),(k)->()", dtype=[np.float_, str], elements=[floats(), text()], unique=[False, True]))
def test_build_eval_ds(args):
    function_evals, objective_names = args
    exp.build_eval_ds(function_evals, objective_names)


@given(gufunc_args("(n),(n,m),(n)->()", dtype=np.float_, elements=floats(min_value=0, max_value=1e6)))
def test_build_timing_ds(args):
    suggest_time, eval_time, observe_time = args
    exp.build_timing_ds(suggest_time, eval_time, observe_time)


@given(
    simple_datasets(
        {"int": (ITER, SUGGEST), "real": (ITER, SUGGEST), "binary": (ITER, SUGGEST), "cat": (ITER, SUGGEST)},
        dtype={"int": int, "real": float, "binary": bool, "cat": str},
        min_side=1,
    )
)
def test_build_suggest_ds(suggest_ds):
    ds_vars = list(suggest_ds)

    n_call, n_suggest = suggest_ds[ds_vars[0]].values.shape
    suggest_log = np.zeros((n_call, n_suggest), dtype=object)
    for ii in range(n_call):
        for jj in range(n_suggest):
            suggest_log[ii, jj] = {}
            for kk in ds_vars:
                suggest_log[ii, jj][kk] = suggest_ds[kk].sel({ITER: ii, SUGGEST: jj}, drop=True).values.item()
    suggest_log = suggest_log.tolist()

    suggest_ds_2 = exp.build_suggest_ds(suggest_log)

    assert suggest_ds.equals(suggest_ds_2)


def test_get_opt_class_module():
    # Should really do parametric test but for loop good enough
    for opt_name in sorted(CONFIG.keys()):
        opt_class = exp._get_opt_class(opt_name)

        fname = inspect.getfile(opt_class)
        fname = os.path.basename(fname)

        wrapper_file, _ = CONFIG[opt_name]

        assert fname == wrapper_file


================================================
FILE: test/hypothesis_util.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from hypothesis import given
from hypothesis.strategies import floats, integers, just, tuples
from hypothesis_gufunc.gufunc import gufunc_args


def identity(x):
    """When one needs a default mapping that does nothing."""
    return x


def seeds():
    return integers(min_value=0, max_value=(2 ** 32) - 1)


def probs():
    return floats(min_value=1e-3, max_value=1 - 1e-3)


def mfloats():
    return floats(min_value=-1e3, max_value=1e3)


def gufunc_floats(signature, min_side=0, max_side=5, unique=False, **kwargs):
    elements = floats(**kwargs)
    S = gufunc_args(signature, dtype=np.float_, elements=elements, unique=unique, min_side=min_side, max_side=max_side)
    return S


def close_enough(x, y, equal_nan=False, rtol=1e-5, atol=1e-8):
    # Might want to adjust rtol and atol for lower precision floats
    x, y = np.asarray(x), np.asarray(y)

    if x.shape != y.shape:
        return False

    if x.dtype != y.dtype:
        return False

    if x.dtype.kind == "f":
        assert y.dtype.kind == "f"
        # Note: equal_nan only considered in both float case!
        return np.allclose(x, y, equal_nan=equal_nan, rtol=rtol, atol=atol)

    return np.all(x == y)


def broadcasted(
    f, signature, itypes, otypes, elements, unique=False, excluded=(), min_side=0, max_side=5, max_dims_extra=2
):
    """Strategy that makes it easy to test the broadcasting semantics of a
    function against the 'ground-truth' broadcasting convention provided by
    :obj:`numpy.vectorize`.

    Parameters
    ----------
    f : callable
        This is the original function handles broadcasting itself. It must
        return an `ndarray` or multiple `ndarray` (which Python treats as a
        `tuple`) if returning 2-or-more output arguments.
    signature : str
        Signature for shapes to be compatible with. Expects string in format
        of numpy generalized universal function signature, e.g.,
        `'(m,n),(n)->(m)'` for vectorized matrix-vector multiplication.
        Officially, only supporting ascii characters.
    itypes : list-like of dtype
        List of numpy `dtype` for each argument. These can be either strings
        (``'int64'``), type (``np.int64``), or numpy `dtype`
        (``np.dtype('int64')``). A single `dtype` can be supplied for all
        arguments.
    otypes : list of dtype
        The dtype for the the outputs of `f`. It must be a list with one dtype
        for each output argument of `f`. It must be a singleton list if `f`
        only returns a single output. It can also be set to `None` to leave it
        to be inferred, but this can create issues with empty arrays, so it is
        not officially supported here.
    elements : list-like of strategy
        Strategies to fill in array elements on a per argument basis. One can
        also specify a single strategy
        (e.g., :func:`hypothesis.strategies.floats`)
        and have it applied to all arguments.
    unique : list-like of bool
        Boolean flag to specify if all elements in an array must be unique.
        One can also specify a single boolean to apply it to all arguments.
    excluded : list-like of integers
        Set of integers representing the positional for which the function will
        not be vectorized. Uses same format as :obj:`numpy.vectorize`.
    min_side : int or dict
        Minimum size of any side of the arrays. It is good to test the corner
        cases of 0 or 1 sized dimensions when applicable, but if not, a min
        size can be supplied here. Minimums can be provided on a per-dimension
        basis using a dict, e.g. ``min_side={'n': 2}``. One can use, e.g.,
        ``min_side={hypothesis.extra.gufunc.BCAST_DIM: 2}`` to limit the size
        of the broadcasted dimensions.
    max_side : int or dict
        Maximum size of any side of the arrays. This can usually be kept small
        and still find most corner cases in testing. Dictionaries can be
        supplied as with `min_side`.
    max_dims_extra : int
        Maximum number of extra dimensions that can be appended on left of
        arrays for broadcasting. This should be kept small as the memory used
        grows exponentially with extra dimensions.

    Returns
    -------
    f : callable
        This is the original function handles broadcasting itself.
    f_vec : callable
        Function that should be functionaly equivalent to `f` but broadcasting
        is handled by :obj:`numpy.vectorize`.
    res : tuple of ndarrays
        Resulting ndarrays with shapes consistent with `signature`. Extra
        dimensions for broadcasting will be present.

    Examples
    --------

    .. code-block:: pycon

      >>> import numpy as np
      >>> from hypothesis.strategies import integers, booleans
      >>> broadcasted(np.add, '(),()->()', ['int64'], ['int64', 'bool'],
                      elements=[integers(0,9), booleans()],
                      unique=[True, False]).example()
      (<ufunc 'add'>,
       <numpy.lib.function_base.vectorize at 0x11a777690>,
       (array([5, 6]), array([ True], dtype=bool)))
      >>> broadcasted(np.add, '(),()->()', ['int64'], ['int64', 'bool'],
                      elements=[integers(0,9), booleans()],
                      excluded=(1,)).example()
      (<ufunc 'add'>,
       <numpy.lib.function_base.vectorize at 0x11a715b10>,
       (array([9]), array(True, dtype=bool)))
      >>> f, fv, args = broadcasted(np.add, '(),()->()', ['int64'],
                                    ['int64', 'bool'],
                                    elements=[integers(0,9), booleans()],
                                    min_side=1, max_side=3,
                                    max_dims_extra=1).example()
      >>> f is np.add
      True
      >>> f(*args)
      7
      >>> fv(*args)
      array(7)
    """
    # cache and doc not needed for property testing, excluded not actually
    # needed here because we don't generate extra dims for the excluded args.
    # Using the excluded argument in np.vectorize only seems to confuse it in
    # corner cases.
    f_vec = np.vectorize(f, signature=signature, otypes=otypes)

    broadcasted_args = gufunc_args(
        signature,
        itypes,
        elements,
        unique=unique,
        excluded=excluded,
        min_side=min_side,
        max_side=max_side,
        max_dims_extra=max_dims_extra,
    )
    funcs_and_args = tuples(just(f), just(f_vec), broadcasted_args)
    return funcs_and_args


def broadcast_tester(
    f,
    signature,
    otype,
    excluded=(),
    dtype=np.float_,
    elements=None,
    unique=False,
    map_=identity,
    min_side=0,
    max_side=5,
    max_dims_extra=2,
    **kwargs,  # This still confuses flake8
):
    # Build the test for broadcasting with random dimensions
    elements = floats(**kwargs) if elements is None else elements

    @given(
        broadcasted(
            f,
            signature,
            otypes=[otype],
            excluded=excluded,
            itypes=dtype,
            elements=elements,
            unique=unique,
            min_side=min_side,
            max_side=max_side,
            max_dims_extra=max_dims_extra,
        )
    )
    def test_f(bargs):
        f0, f_vec, args = bargs
        args = map_(args)

        R1 = f0(*args)
        R2 = f_vec(*args)

        kind = np.dtype(otype).kind
        if kind in "US":  # Same kind ok for str and unicode dtypes
            assert R1.dtype.kind == kind
            assert R2.dtype.kind == kind
        elif otype is not None:
            assert R1.dtype == otype
            assert R2.dtype == otype
        assert close_enough(R1, R2, equal_nan=True)

    # Call the test
    test_f()


def multi_broadcast_tester(
    f,
    signature,
    otypes,
    excluded=(),
    dtype=np.float_,
    elements=None,
    unique=False,
    map_=identity,
    min_side=0,
    max_side=5,
    max_dims_extra=2,
    **kwargs,
):
    elements = floats(**kwargs) if elements is None else elements

    @given(
        broadcasted(
            f,
            signature,
            otypes=otypes,
            excluded=excluded,
            itypes=dtype,
            elements=elements,
            unique=unique,
            min_side=min_side,
            max_side=max_side,
            max_dims_extra=max_dims_extra,
        )
    )
    def test_f(bargs):
        f0, f_vec, args = bargs
        args = map_(args)

        R1 = f0(*args)
        R2 = f_vec(*args)
        for rr1, rr2, ot in zip(R1, R2, otypes):
            kind = np.dtype(ot).kind
            if kind in "US":  # Same kind ok for str and unicode dtypes
                assert R1.dtype.kind == kind
                assert R2.dtype.kind == kind
            else:
                assert rr1.dtype == ot
                assert rr2.dtype == ot
            assert close_enough(rr1, rr2, equal_nan=True)

    # Call the test
    test_f()


================================================
FILE: test/np_util_test.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from hypothesis import assume, given
from hypothesis.strategies import floats, integers, lists

from bayesmark import np_util
from hypothesis_util import broadcast_tester, close_enough, gufunc_floats, seeds


@given(seeds())
def test_random_seed(seed):
    random = np.random.RandomState(seed)
    seed = np_util.random_seed(random)


@given(lists(lists(floats())), seeds())
def test_shuffle_2d(X, seed):
    random = np.random.RandomState(seed)
    np_util.shuffle_2d(X, random)


@given(gufunc_floats("(n,m)->()"), integers(1, 5), seeds())
def test_strat_split(X, n_splits, seed):
    X, = X

    random = np.random.RandomState(seed)
    np_util.strat_split(X, n_splits, inplace=False, random=random)

    random = np.random.RandomState(seed)
    np_util.strat_split(X, n_splits, inplace=True, random=random)


@given(gufunc_floats("(),()->()", allow_nan=False))
def test_isclose_lte_pass(args):
    x, y = args
    x = np.minimum(x, y + 1e-10)
    assert np_util.isclose_lte(x, y)


@given(gufunc_floats("(),()->()", allow_nan=False))
def test_isclose_lte_fail(args):
    x, y = args
    fac = 1e20
    if np.ndim(x) == 0:
        y = np.nan_to_num(y)
        x = y + fac * np.spacing(np.abs(y)) + 1
    else:
        y.flat[0] = np.nan_to_num(y.flat[0])
        x.flat[0] = y.flat[0] + fac * np.spacing(np.abs(y.flat[0])) + 1
    assert not np_util.isclose_lte(x, y)


def test_isclose_broadcast():
    broadcast_tester(np_util.isclose_lte, "(),()->()", otype="bool", min_value=-1000, max_value=1000)


@given(gufunc_floats("(),(),()->()", allow_nan=False))
def test_clip_chk_pass(args):
    x, lb, ub = args

    assume(lb <= ub)

    x = np.clip(x, lb - 1e-10, ub + 1e-10)
    x_clip = np_util.clip_chk(x, lb=lb, ub=ub)
    assert np.all(x_clip == np.clip(x_clip, lb, ub))


@given(gufunc_floats("(),(),()->()", allow_nan=True))
def test_clip_chk_pass_nan(args):
    x, lb, ub = args

    assume(lb <= ub)

    x = np.clip(x, lb - 1e-10, ub + 1e-10)
    x_clip = np_util.clip_chk(x, lb=lb, ub=ub, allow_nan=True)
    assert np.all((np.isnan(x) & np.isnan(x_clip)) | (x_clip == np.clip(x_clip, lb, ub)))


@given(gufunc_floats("(n),()->()", allow_nan=False))
def test_snap_to_pass(args):
    x, val = args

    x = np.clip(x, val - 1e-10, val + 1e-10)
    x_snap = np_util.snap_to(x, val)
    assert np.all(x_snap == val)


@given(gufunc_floats("(),(),(),()->()", min_value=-1000, max_value=1000))
def test_linear_rescale_bounds(args):
    lb0, ub0, lb1, ub1 = args

    # Use sorted because hypothesis doesn't like using assume too often
    lb0, ub0 = sorted([lb0, ub0])
    lb1, ub1 = sorted([lb1, ub1])

    assume(lb0 < ub0)
    assume(lb1 <= ub1)

    lb1_ = np_util.linear_rescale(lb0, lb0, ub0, lb1, ub1)
    assert close_enough(lb1, lb1_)

    ub1_ = np_util.linear_rescale(ub0, lb0, ub0, lb1, ub1)
    assert close_enough(ub1, ub1_)


@given(gufunc_floats("(),(),(),(),()->()", min_value=-1000, max_value=1000))
def test_linear_rescale_inner(args):
    X, lb0, ub0, lb1, ub1 = args

    # Use sorted because hypothesis doesn't like using assume too often
    lb0, ub0 = sorted([lb0, ub0])
    lb1, ub1 = sorted([lb1, ub1])

    assume(lb0 < ub0)
    assume(lb1 <= ub1)

    X = np.clip(X, lb0, ub0)

    X = np_util.linear_rescale(X, lb0, ub0, lb1, ub1)

    assert np.all(X <= ub1)
    assert np.all(lb1 <= X)


@given(gufunc_floats("(),(),(),(),(),()->()", min_value=-1000, max_value=1000))
def test_linear_rescale_inverse(args):
    X, lb0, ub0, lb1, ub1, enforce_bounds = args
    enforce_bounds = enforce_bounds >= 0

    # Use sorted because hypothesis doesn't like using assume too often
    lb0, ub0 = sorted([lb0, ub0])
    lb1, ub1 = sorted([lb1, ub1])

    assume(lb0 < ub0)
    assume(lb1 < ub1)
    # Can't expect numerics to work well in these extreme cases:
    assume((ub0 - lb0) < 1e3 * (ub1 - lb1))

    if enforce_bounds:
        X = np.clip(X, lb0, ub0)

    X_ = np_util.linear_rescale(X, lb0, ub0, lb1, ub1, enforce_bounds=enforce_bounds)
    X_ = np_util.linear_rescale(X_, lb1, ub1, lb0, ub0, enforce_bounds=enforce_bounds)

    assert close_enough(X_, X)


@given(gufunc_floats("(),(),(),(),()->()", min_value=-1000, max_value=1000))
def test_linear_rescale_bound_modes(args):
    X, lb0, ub0, lb1, ub1 = args

    # Use sorted because hypothesis doesn't like using assume too often
    lb0, ub0 = sorted([lb0, ub0])
    lb1, ub1 = sorted([lb1, ub1])

    assume(lb0 < ub0)
    assume(lb1 <= ub1)

    X = np.clip(X, lb0, ub0)

    Y1 = np_util.linear_rescale(X, lb0, ub0, lb1, ub1, enforce_bounds=False)
    Y2 = np_util.linear_rescale(X, lb0, ub0, lb1, ub1, enforce_bounds=True)

    assert close_enough(Y1, Y2)


def pair_sort(X, Y):
    X, Y = np.broadcast_arrays(X, Y)
    Z = [X, Y]
    Z = np.sort(Z, axis=0)
    X, Y = Z
    return X, Y


def test_linear_rescale_broadcast():
    def clean_up(args):
        X, lb0, ub0, lb1, ub1, enforce_bounds = args

        enforce_bounds = enforce_bounds >= 0

        # Ideally, hypothesis should be able to handle constraints like this
        lb0, ub0 = pair_sort(lb0, ub0)
        lb1, ub1 = pair_sort(lb1, ub1)

        assume(np.all(lb0 < ub0))
        assume(np.all(lb1 <= ub1))

        if enforce_bounds:
            X = np.clip(X, lb0, ub0)

        return X, lb0, ub0, lb1, ub1, enforce_bounds

    broadcast_tester(
        np_util.linear_rescale,
        "(),(),(),(),(),()->()",
        "float64",
        excluded=(5,),
        map_=clean_up,
        min_value=-1000,
        max_value=1000,
    )


@given(floats(), floats())
def test_isclose(x, y):
    """Test numpy version new enough to avoid broadcasting bug in `np.isclose`.

    See numpy issue 'inconsistency in np.isclose #7014'. We could bump up numpy requirement version and
    eliminate this wrapper.

    See:
    https://github.com/numpy/numpy/issues/7014
    """
    z = np.isclose(x, y)
    assert type(z) == np.bool_
    assert z == np.squeeze(z)
    assert np.isscalar(z)
    assert np.shape(z) == ()

    z = np.isclose(np.asarray(x), y)
    assert type(z) == np.bool_
    assert z == np.squeeze(z)
    assert np.isscalar(z)
    assert np.shape(z) == ()

    z = np.isclose(x, np.asarray(y))
    assert type(z) == np.bool_
    assert z == np.squeeze(z)
    assert np.isscalar(z)
    assert np.shape(z) == ()

    z = np.isclose(np.asarray(x), np.asarray(y))
    assert type(z) == np.bool_
    assert z == np.squeeze(z)
    assert np.isscalar(z)
    assert np.shape(z) == ()


def test_isclose_2():
    """Make sure we are running numpy version where numpy issue 'inconsistency in np.isclose #7014' has been fixed.
    """
    y = np.isclose(0, 1)

    assert np.ndim(y) == 0
    assert np.isscalar(y)


@given(gufunc_floats("(n,m)->(2)", allow_nan=False, min_side=1))
def test_argmin_2d_no_nan(args):
    X, = args

    idx0, idx1 = np_util.argmin_2d(X)
    assert X[idx0, idx1] <= np.min(X)


@given(gufunc_floats("(n,m)->(2)", allow_nan=True, min_side=1))
def test_argmin_2d_nan(args):
    X, = args

    idx0, idx1 = np_util.argmin_2d(X)
    assert np.isnan(X[idx0, idx1]) == np.any(np.isnan(X))


@given(gufunc_floats("(n,m),(n,m)->(n,m)", allow_nan=False))
def test_cummin(args):
    x_val, x_key = args

    n, m = x_val.shape

    c_min = np_util.cummin(x_val, x_key)
    assert c_min.shape == (n, m)

    for ii in range(n):
        for jj in range(m):
            last_min = np.where(x_key[: ii + 1, jj] == x_key[: ii + 1, jj].min())[0][-1]
            assert x_key[last_min, jj] <= np.min(x_key[: ii + 1, jj])

            assert c_min[ii, jj] == x_val[last_min, jj]


@given(gufunc_floats("(n,m),(n,m)->(n,m)", allow_nan=True))
def test_cummin_nan(args):
    x_val, x_key = args

    n, m = x_val.shape

    x_key = np.nan_to_num(x_key)

    c_min = np_util.cummin(x_val, x_key)
    assert c_min.shape == (n, m)

    for ii in range(n):
        for jj in range(m):
            last_min = np.where(x_key[: ii + 1, jj] == x_key[: ii + 1, jj].min())[0][-1]
            assert x_key[last_min, jj] <= np.min(x_key[: ii + 1, jj])

            if np.isnan(c_min[ii, jj]):
                assert np.isnan(x_val[last_min, jj])
            else:
                assert c_min[ii, jj] == x_val[last_min, jj]


================================================
FILE: test/quantiles_test.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import scipy.stats as ss
from hypothesis import assume, given
from hypothesis.strategies import floats, integers
from hypothesis_gufunc.gufunc import gufunc_args as gufunc

import bayesmark.quantiles as qt
from hypothesis_util import broadcast_tester, gufunc_floats, multi_broadcast_tester

# We could use nextafter to get closer to limits, but still creates numerics
# issues.
ABOVE0 = 1e-6
BELOW1 = 1 - 1e-6
GLOBAL_FPR = 0.05

# Will need to generalize these out to limits before upstreaming:
floats_ = floats(allow_infinity=False, allow_nan=False)
counts = integers(1, 1000)
probs = floats(ABOVE0, BELOW1)


def order_stats_trim(X):
    Y = qt.order_stats(X)

    X_ss = np.array(X.shape)
    X_ss[-1] = X_ss[-1] + 2
    assert Y.shape == tuple(X_ss)
    assert np.all(Y[..., 0] == -np.inf)
    assert np.all(Y[..., -1] == np.inf)

    Y = Y[..., 1:-1]  # Trim out infs
    assert Y.shape == tuple(X.shape)
    return Y


@given(gufunc_floats("(n)->(m)", allow_nan=False))
def test_order_stats(args):
    X, = args

    o_stats = qt.order_stats(X)

    assert len(o_stats) == len(X) + 2

    # test is sorted
    assert not np.any(np.diff(o_stats) < 0)

    # limit elements
    assert o_stats[0] == -np.inf
    assert o_stats[-1] == np.inf

    # equal to equiv versions with lists
    assert [-np.inf] + sorted(X) + [np.inf] == list(o_stats)


@given(gufunc_floats("(n)->()", allow_nan=False))
def test_quantile(args):
    X, = args

    ll = qt.quantile(X, np.nextafter(0, 1))
    assert ll == -np.inf if len(X) == 0 else ll == np.min(X)

    uu = qt.quantile(X, np.nextafter(1, 0))
    assert uu == -np.inf if len(X) == 0 else uu == np.max(X)

    if len(X) % 2 == 1:
        mm = qt.quantile(X, 0.5)
        assert mm == np.median(X)


@given(gufunc("(n),()->()", dtype=np.float_, elements=[floats_, probs]))
def test_quantile_to_np(args):
    X, q = args

    estimate = qt.quantile(X, q)

    # Correct the off-by-1 error in numpy percentile. This might still have
    # issues due to round off error by multiplying by 100 since powers of 10
    # are not very fp friendly.
    estimate_np = np.percentile(np.concatenate(([-np.inf], X)), 100 * q, interpolation="higher")

    assert estimate == estimate_np


@given(gufunc("(n),(),()->(),()", dtype=np.float_, elements=[floats_, probs, probs]))
def test_quantile_CI(args):
    X, q, alpha = args

    idx_q = qt._quantile(len(X), q)
    idx_l, idx_u = qt._quantile_CI(len(X), q, alpha)
    assert idx_l <= idx_q
    assert idx_q <= idx_u

    # Lot's of checks already inside quantile_CI
    LB, UB = qt.quantile_CI(X, q, alpha)
    assert LB <= UB

    estimate = qt.quantile(X, q)
    assert LB <= estimate
    assert estimate <= UB


@given(gufunc("(n),(),()->(),()", dtype=np.float_, elements=[floats_, probs, probs]))
def test_quantile_CI_monotone_x(args):
    X, q, alpha = args

    assume(len(X) >= 1)

    LB1, UB1 = qt.quantile_CI(X, q, alpha)

    X2 = np.copy(X)
    X2[0] = -np.inf
    LB2, UB2 = qt.quantile_CI(X2, q, alpha)
    assert LB1 >= LB2
    assert UB1 >= UB2

    X2 = np.copy(X)
    X2[0] = np.inf
    LB2, UB2 = qt.quantile_CI(X2, q, alpha)
    assert LB1 <= LB2
    assert UB1 <= UB2


@given(gufunc("(n),(2),()->(),()", dtype=np.float_, elements=[floats_, probs, probs]))
def test_quantile_CI_monotone_q(args):
    X, q, alpha = args

    q1, q2 = sorted(q)

    # Lot's of checks already inside quantile_CI
    LB1, UB1 = qt.quantile_CI(X, q1, alpha)
    LB2, UB2 = qt.quantile_CI(X, q2, alpha)
    assert LB1 <= LB2
    assert UB1 <= UB2


@given(gufunc("(n),(),(2)->(),()", dtype=np.float_, elements=[floats_, probs, probs]))
def test_quantile_CI_monotone_alpha(args):
    X, q, alpha = args

    alpha1, alpha2 = sorted(alpha)

    # Lot's of checks already inside quantile_CI
    LB1, UB1 = qt.quantile_CI(X, q, alpha1)  # This CI should be larger
    LB2, UB2 = qt.quantile_CI(X, q, alpha2)
    assert LB1 <= LB2
    assert UB1 >= UB2


@given(
    gufunc(
        "(n),(),(),()->()", dtype=[np.float_, np.float_, np.int_, np.float_], elements=[floats_, probs, counts, probs]
    )
)
def test_max_quantile_CI(args):
    X, q, m, alpha = args

    estimate0, LB0, UB0 = qt.max_quantile_CI(X, q, m, alpha)
    assert LB0 <= estimate0
    assert estimate0 <= UB0

    # Recompute without using _ internal funcs
    q = q ** (1.0 / m)
    LB, UB = qt.quantile_CI(X, q, alpha=alpha)
    estimate = qt.quantile(X, q)

    assert estimate0 == estimate
    assert LB0 == LB
    assert UB0 == UB


@given(
    gufunc(
        "(n),(),(),()->()", dtype=[np.float_, np.float_, np.int_, np.float_], elements=[floats_, probs, counts, probs]
    )
)
def test_min_quantile_CI(args):
    X, q, m, alpha = args

    estimate0, LB0, UB0 = qt.min_quantile_CI(X, q, m, alpha)
    assert LB0 <= estimate0
    assert estimate0 <= UB0

    # Recompute without using _ internal funcs
    q = 1.0 - (1.0 - q) ** (1.0 / m)
    LB, UB = qt.quantile_CI(X, q, alpha=alpha)
    estimate = qt.quantile(X, q)

    assert estimate0 == estimate
    assert LB0 == LB
    assert UB0 == UB


@given(
    gufunc(
        "(n),(),(),()->()", dtype=[np.float_, np.float_, np.int_, np.float_], elements=[floats_, probs, counts, probs]
    )
)
def test_min_quantile_CI_to_max(args):
    X, q, m, alpha = args

    epsilon = 1e-8  # Small allowance for numerics

    estimate0, LB0, UB0 = qt.min_quantile_CI(X, q, m, alpha)

    # Try just above and below to allow for numerics error in case we are
    # just on the boundary.
    estimate1, LB1, UB1 = qt.max_quantile_CI(-X, (1.0 - q) - epsilon, m, alpha)
    estimate2, LB2, UB2 = qt.max_quantile_CI(-X, 1.0 - q, m, alpha)
    estimate3, LB3, UB3 = qt.max_quantile_CI(-X, (1.0 - q) + epsilon, m, alpha)

    if len(X) == 0:
        assert estimate0 == -np.inf  # quantile spec rounds down if n=0
    else:
        assert -estimate0 in (estimate1, estimate2, estimate3)

    assert -LB0 in (UB1, UB2, UB3)
    assert -UB0 in (LB1, LB2, LB3)


@given(gufunc("(n),(),()->()", dtype=np.float_, elements=[floats_, probs, probs]))
def test_quantile_and_CI(args):
    X, q, alpha = args

    estimate0, LB0, UB0 = qt.quantile_and_CI(X, q, alpha)
    assert LB0 <= estimate0
    assert estimate0 <= UB0

    # Recompute without using _ internal funcs
    LB, UB = qt.quantile_CI(X, q, alpha=alpha)
    estimate = qt.quantile(X, q)

    assert estimate0 == estimate
    assert LB0 == LB
    assert UB0 == UB


def test_order_stats_broadcast():
    broadcast_tester(order_stats_trim, "(n)->(n)", otype="float64", dtype=np.float_, elements=floats_)


def test_quantile_broadcast_0():
    broadcast_tester(
        qt.quantile, "(n),()->()", otype="float64", excluded=(0,), dtype=np.float_, elements=[floats_, probs]
    )


def test_quantile_broadcast_1():
    broadcast_tester(
        qt.quantile, "(n),()->()", otype="float64", excluded=(1,), dtype=np.float_, elements=[floats_, probs]
    )


def test_quantile_CI_broadcast_0():
    multi_broadcast_tester(
        qt.quantile_CI,
        "(n),(),()->(),()",
        otypes=["float64", "float64"],
        excluded=(0,),
        dtype=np.float_,
        elements=[floats_, probs, probs],
    )


def test_quantile_CI_broadcast_1():
    multi_broadcast_tester(
        qt.quantile_CI,
        "(n),(),()->(),()",
        excluded=(1, 2),
        otypes=["float64", "float64"],
        dtype=np.float_,
        elements=[floats_, probs, probs],
    )


def test_max_quantile_CI_broadcast_0():
    multi_broadcast_tester(
        qt.max_quantile_CI,
        "(n),(),(),()->(),(),()",
        otypes=["float64", "float64", "float64"],
        excluded=(0,),
        dtype=[np.float_, np.float_, np.int_, np.float_],
        elements=[floats_, probs, counts, probs],
    )


def test_max_quantile_CI_broadcast_1():
    multi_broadcast_tester(
        qt.max_quantile_CI,
        "(n),(),(),()->(),(),()",
        otypes=["float64", "float64", "float64"],
        excluded=(1, 2, 3),
        dtype=[np.float_, np.float_, np.int_, np.float_],
        elements=[floats_, probs, counts, probs],
    )


def test_min_quantile_CI_broadcast_0():
    multi_broadcast_tester(
        qt.min_quantile_CI,
        "(n),(),(),()->(),(),()",
        otypes=["float64", "float64", "float64"],
        excluded=(0,),
        dtype=[np.float_, np.float_, np.int_, np.float_],
        elements=[floats_, probs, counts, probs],
    )


def test_min_quantile_CI_broadcast_1():
    multi_broadcast_tester(
        qt.min_quantile_CI,
        "(n),(),(),()->(),(),()",
        otypes=["float64", "float64", "float64"],
        excluded=(1, 2, 3),
        dtype=[np.float_, np.float_, np.int_, np.float_],
        elements=[floats_, probs, counts, probs],
    )


def test_quantile_and_CI_broadcast_0():
    multi_broadcast_tester(
        qt.quantile_and_CI,
        "(n),(),()->(),(),()",
        otypes=["float64", "float64", "float64"],
        excluded=(0,),
        dtype=np.float_,
        elements=[floats_, probs, probs],
    )


def test_quantile_and_CI_broadcast_1():
    multi_broadcast_tester(
        qt.quantile_and_CI,
        "(n),(),()->(),(),()",
        otypes=["float64", "float64", "float64"],
        excluded=(1, 2),
        dtype=np.float_,
        elements=[floats_, probs, probs],
    )


def mc_test_quantile_CI(mc_runs=1000, n=2000, q=0.5, alpha=0.05, random=np.random):
    q0 = ss.norm.ppf(q)

    X = random.randn(mc_runs, n)
    R = np.array([qt.quantile_CI(xx, q) for xx in X])
    LB, UB = R[:, 0], R[:, 1]

    n_pass = np.sum((LB <= q0) & (q0 <= UB))
    # This is only a one-sided test
    pval = ss.binom.cdf(n_pass, mc_runs, 1 - alpha)
    return pval


def mc_test_max_quantile_CI(mc_runs=1000, n=2000, q=0.5, m=100, alpha=0.05, random=np.random):
    qq_level = q ** (1.0 / m)
    q0 = ss.norm.ppf(qq_level)

    X = random.randn(mc_runs, n)
    R = np.array([qt.max_quantile_CI(xx, q, m, alpha) for xx in X])
    LB, UB = R[:, 1], R[:, 2]

    n_pass = np.sum((LB <= q0) & (q0 <= UB))
    # This is only a one-sided test
    pval = ss.binom.cdf(n_pass, mc_runs, 1 - alpha)
    return pval


def mc_test_min_quantile_CI(mc_runs=1000, n=2000, q=0.5, m=100, alpha=0.05, random=np.random):
    qq_level = 1.0 - (1.0 - q) ** (1.0 / m)
    q0 = ss.norm.ppf(qq_level)

    X = random.randn(mc_runs, n)
    R = np.array([qt.min_quantile_CI(xx, q, m, alpha) for xx in X])
    LB, UB = R[:, 1], R[:, 2]

    n_pass = np.sum((LB <= q0) & (q0 <= UB))
    # This is only a one-sided test
    pval = ss.binom.cdf(n_pass, mc_runs, 1 - alpha)
    return pval


def test_all_mc():
    random = np.random.RandomState(8623)

    pvals = []
    pvals.append(mc_test_quantile_CI(q=0.3, random=random))
    pvals.append(mc_test_quantile_CI(q=0.5, random=random))
    pvals.append(mc_test_quantile_CI(q=0.99, random=random))
    pvals.append(mc_test_max_quantile_CI(q=0.3, random=random))
    pvals.append(mc_test_max_quantile_CI(q=0.5, random=random))
    pvals.append(mc_test_max_quantile_CI(q=0.99, random=random))
    pvals.append(mc_test_min_quantile_CI(q=0.3, random=random))
    pvals.append(mc_test_min_quantile_CI(q=0.5, random=random))
    pvals.append(mc_test_min_quantile_CI(q=0.99, random=random))

    SIDAK_FPR = 1.0 - (1.0 - GLOBAL_FPR) ** (1.0 / len(pvals))
    assert np.min(pvals) >= SIDAK_FPR

    return pvals


================================================
FILE: test/random_search_test.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from hypothesis import given, settings
from hypothesis.strategies import integers

import bayesmark.space as sp
from bayesmark.np_util import linear_rescale
from bayesmark.random_search import suggest_dict
from hypothesis_util import close_enough, gufunc_floats, seeds
from util import space_configs


@given(space_configs(allow_missing=True), integers(min_value=1, max_value=8), seeds())
@settings(deadline=None)
def test_random_search_suggest_sanity(api_args, n_suggest, seed):
    meta, X, y, _ = api_args

    # Get the unwarped X
    S = sp.JointSpace(meta)
    lower, upper = S.get_bounds().T
    S.validate(X)

    N = len(X)
    # Split history and call twice with diff histories but same seed
    M = N // 2
    X1, X2 = X[:M], X[M:]
    y1, y2 = y[:M], y[M:]

    x_guess = suggest_dict(X1, y1, meta, n_suggest, random=np.random.RandomState(seed))
    x_guess2 = suggest_dict(X2, y2, meta, n_suggest, random=np.random.RandomState(seed))

    # Check types too
    assert len(x_guess) == n_suggest
    assert all(all(close_enough(x_guess[nn][k], x_guess2[nn][k]) for k in x_guess[nn]) for nn in range(len(x_guess)))
    assert np.all(x_guess == x_guess2)
    # Make sure validated
    S.validate(x_guess)
    S.validate(x_guess2)

    # Test sanity of output
    D, = lower.shape
    x_guess_w = S.warp(x_guess)
    assert type(x_guess_w) == np.ndarray
    assert x_guess_w.dtype.kind == "f"
    assert x_guess_w.shape == (n_suggest, D)
    assert x_guess_w.shape == (n_suggest, D)
    assert np.all(x_guess_w <= upper)


@given(
    gufunc_floats("(n,D),(n)->()", min_value=0.0, max_value=1.0, min_side={"D": 1}),
    integers(min_value=1, max_value=10),
    seeds(),
)
@settings(deadline=None)
def test_random_search_suggest_diff(api_args, n_suggest, seed):
    # Hard to know how many iters needed for arbitrary space that we need to
    # run so that we don't get dupes by chance. So, for now, let's just stick
    # with this simple space.
    dim = {"space": "linear", "type": "real", "range": [1.0, 5.0]}

    # Use at least 10 n_suggest to make sure don't get same answer by chance
    X_w, y = api_args

    D = X_w.shape[1]
    param_names = ["x%d" % ii for ii in range(5)]
    meta = dict(zip(param_names, [dim] * D))

    # Get the unwarped X
    S = sp.JointSpace(meta)
    lower, upper = S.get_bounds().T
    X_w = linear_rescale(X_w, lb0=0.0, ub0=1.0, lb1=lower, ub1=upper)
    X = S.unwarp(X_w)
    S.validate(X)

    seed = seed // 2  # Keep in bounds even after add 7

    x_guess = suggest_dict(X, y, meta, n_suggest, random=np.random.RandomState(seed))
    # Use diff seed to intentionally get diff result
    x_guess2 = suggest_dict(X, y, meta, n_suggest, random=np.random.RandomState(seed + 7))

    # Check types too
    assert len(x_guess) == n_suggest
    assert len(x_guess2) == n_suggest
    assert not np.all(x_guess == x_guess2)
    # Make sure validated
    S.validate(x_guess)
    S.validate(x_guess2)

    # Test sanity of output
    D, = lower.shape

    x_guess_w = S.warp(x_guess)
    assert type(x_guess_w) == np.ndarray
    assert x_guess_w.dtype.kind == "f"
    assert x_guess_w.shape == (n_suggest, D)
    assert x_guess_w.shape == (n_suggest, D)
    assert np.all(x_guess_w <= upper)

    x_guess_w = S.warp(x_guess2)
    assert type(x_guess_w) == np.ndarray
    assert x_guess_w.dtype.kind == "f"
    assert x_guess_w.shape == (n_suggest, D)
    assert x_guess_w.shape == (n_suggest, D)
    assert np.all(x_guess_w <= upper)


================================================
FILE: test/serialize_test.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from string import ascii_letters, digits

from hypothesis import given
from hypothesis.strategies import lists, text, uuids
from pathvalidate.argparse import validate_filename, validate_filepath

from bayesmark.serialize import XRSerializer


def filepaths():
    def valid(ss):
        try:
            validate_filepath(ss)
        except Exception:
            return False
        return True

    alphabet = ascii_letters + digits + "_.-~" + os.sep
    S = text(alphabet=alphabet, min_size=1).map(lambda ss: os.sep + ss).filter(valid)
    return S


def filenames(suffix=""):
    def valid(ss):
        try:
            validate_filename(ss)
        except Exception:
            return False
        return True

    alphabet = ascii_letters + digits + "_.-~"
    S = text(alphabet=alphabet, min_size=1).map(lambda ss: ss + suffix).filter(valid)
    return S


@given(filepaths(), lists(filenames()), filenames())
def test_init_db_manual(db_root, keys, db):
    XRSerializer.init_db_manual(db_root, keys, db)


@given(uuids())
def test_uuid_to_fname(uu):
    ff = XRSerializer._uuid_to_fname(uu)
    uu_ = XRSerializer._fname_to_uuid(ff)
    assert uu == uu_

    ff_ = XRSerializer._uuid_to_fname(uu_)
    assert ff == ff_


@given(filenames())
def test_key_to_fname(key):
    ff = XRSerializer._key_to_fname(key)
    kk = XRSerializer._fname_to_key(ff)
    assert key == kk


@given(filepaths(), lists(filenames()), filenames())
def test_validate(db_root, keys, db):
    XRSerializer._validate(db_root, keys, db)


================================================
FILE: test/signatures_test.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import random as pyrandom
import warnings

import numpy as np
from hypothesis import given
from hypothesis.strategies import dictionaries, floats, lists, text, tuples

import bayesmark.signatures as ss
from bayesmark.experiment import OBJECTIVE_NAMES
from util import space_configs

N_SIG = ss.N_SUGGESTIONS


def bsigs():
    S = lists(floats(allow_infinity=False, allow_nan=False), min_size=N_SIG, max_size=N_SIG)
    return S


def sigs():
    S = lists(bsigs(), min_size=1)
    return S


def sig_pair():
    def separate(D):
        signatures, signatures_ref = {}, {}
        for kk in D:
            if len(D[kk]) == 1:
                v_ref, = D[kk]
                signatures_ref[kk] = np.asarray(v_ref)
            elif len(D[kk]) == 2:
                v, v_ref = D[kk]
                signatures[kk] = np.asarray(v)
                signatures_ref[kk] = np.asarray(v_ref)
            else:
                assert False
        return signatures, signatures_ref

    sig_dict = dictionaries(text(), tuples(bsigs()) | tuples(bsigs(), bsigs()))
    S = sig_dict.map(separate)
    return S


def some_mock_f(x):
    """Some arbitrary deterministic test function.
    """
    random_stream = pyrandom.Random(json.dumps(x, sort_keys=True))
    y = [random_stream.gauss(0, 1) for _ in OBJECTIVE_NAMES]
    return y


@given(space_configs())
def test_get_func_signature(api_config):
    api_config, _, _, _ = api_config

    signature_x, signature_y = ss.get_func_signature(some_mock_f, api_config)


@given(dictionaries(text(), sigs()))
def test_analyze_signatures(signatures):
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=RuntimeWarning)
        sig_errs, signatures_median = ss.analyze_signatures(signatures)


@given(sig_pair())
def test_analyze_signature_pair(args):
    signatures, signatures_ref = args
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=RuntimeWarning)
        sig_errs, signatures_pair = ss.analyze_signature_pair(signatures, signatures_ref)


================================================
FILE: test/sklearn_funcs_test.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pickle as pkl

import numpy as np
from hypothesis import assume, given, settings
from hypothesis.strategies import sampled_from, text
from sklearn.linear_model import LinearRegression

from bayesmark import data
from bayesmark import sklearn_funcs as skf
from bayesmark.constants import ARG_DELIM, DATA_LOADER_NAMES, METRICS, MODEL_NAMES
from bayesmark.random_search import suggest_dict
from bayesmark.space import JointSpace
from hypothesis_util import seeds


@given(sampled_from(MODEL_NAMES), sampled_from(DATA_LOADER_NAMES), sampled_from(METRICS), seeds(), seeds())
@settings(deadline=None)
def test_sklearn_model(model, dataset, metric, shuffle_seed, rs_seed):
    prob_type = data.get_problem_type(dataset)
    assume(metric in data.METRICS_LOOKUP[prob_type])

    test_prob = skf.SklearnModel(model, dataset, metric, shuffle_seed=shuffle_seed)

    api_config = test_prob.get_api_config()
    x_guess, = suggest_dict([], [], api_config, n_suggestions=1, random=np.random.RandomState(rs_seed))

    loss = test_prob.evaluate(x_guess)

    assert isinstance(loss, tuple)
    assert all(isinstance(xx, float) for xx in loss)
    assert np.shape(loss) == np.shape(test_prob.objective_names)


@given(text(), text(), text())
def test_inverse_test_case_str(model, dataset, scorer):
    assume(ARG_DELIM not in (model + dataset + scorer))

    test_case = skf.SklearnModel.test_case_str(model, dataset, scorer)
    R = skf.SklearnModel.inverse_test_case_str(test_case)

    assert R == (model, dataset, scorer)


@given(sampled_from(MODEL_NAMES), sampled_from(DATA_LOADER_NAMES), sampled_from(METRICS), seeds(), seeds())
@settings(deadline=None)
def test_sklearn_model_surr(model, dataset, metric, model_seed, rs_seed):
    prob_type = data.get_problem_type(dataset)
    assume(metric in data.METRICS_LOOKUP[prob_type])

    test_prob = skf.SklearnModel(model, dataset, metric, shuffle_seed=0)
    api_config = test_prob.get_api_config()
    space = JointSpace(api_config)

    n_obj = len(test_prob.objective_names)

    n_suggestions = 20

    x_guess = suggest_dict([], [], api_config, n_suggestions=n_suggestions, random=np.random.RandomState(rs_seed))
    x_guess_w = space.warp(x_guess)

    random = np.random.RandomState(model_seed)
    y = random.randn(n_suggestions, n_obj)

    reg = LinearRegression()
    reg.fit(x_guess_w, y)
    loss0 = reg.predict(x_guess_w)

    path = pkl.dumps(reg)
    del reg
    assert isinstance(path, bytes)

    test_prob_surr = skf.SklearnSurrogate(model, dataset, metric, path)
    loss = test_prob_surr.evaluate(x_guess[0])

    assert isinstance(loss, tuple)
    assert all(isinstance(xx, float) for xx in loss)
    assert np.shape(loss) == np.shape(test_prob.objective_names)

    assert np.allclose(loss0[0], np.array(loss))


================================================
FILE: test/space_test.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from hypothesis import assume, given
from hypothesis.extra.numpy import from_dtype
from hypothesis.strategies import booleans, floats, integers, just, lists, sampled_from
from hypothesis_gufunc.gufunc import gufunc_args as gufunc
from scipy.interpolate import interp1d
from sklearn.preprocessing import LabelBinarizer

import bayesmark.space as sp
from bayesmark.np_util import linear_rescale
from bayesmark.space import CAT_DTYPE, CAT_KIND, CAT_NATIVE_DTYPE
from hypothesis_util import broadcast_tester, close_enough, gufunc_floats
from util import space_configs

INT_MIN = np.iinfo("i").min
INT_MAX = np.iinfo("i").max

WARPS = ("logit", "linear", "bilog", "log")
ENCODER_DTYPES = ("bool", "int", "float")


def encoder_gen(args):
    X, labels, assume_sorted, dtype, assume_valid = args

    if assume_sorted:
        labels = np.sort(labels)
    X = labels[X % len(labels)]
    dtype = dtype.item()  # np.array does not like np.array(dtype)
    return X, labels, assume_sorted, dtype, assume_valid


def decoder_gen(args):
    Y, labels, assume_sorted, dtype, assume_valid = args

    if assume_sorted:
        labels = np.sort(labels)
    dtype = dtype.item()
    return Y, labels, assume_sorted, dtype, assume_valid


def decoder_gen_broadcast(args):
    Y, labels, assume_sorted = args

    if assume_sorted:
        labels = np.sort(labels)
    return Y, labels, assume_sorted


@given(
    gufunc(
        "(),(n),(),(),()->(n)",
        dtype=[np.int_, CAT_DTYPE, np.bool_, str, np.bool_],
        elements=[
            integers(0, INT_MAX),
            from_dtype(np.dtype(CAT_DTYPE)),
            booleans(),
            sampled_from(ENCODER_DTYPES),
            booleans(),
        ],
        unique=[False, True, False, False, False],
        min_side={"n": 1},
    ).map(encoder_gen)
)
def test_encode_decode(args):
    X, labels, assume_sorted, dtype, assume_valid = args

    Y = sp.encode(X, labels, assume_sorted=assume_sorted, dtype=dtype, assume_valid=assume_valid)
    if assume_sorted:  # otherwise labels will be re-arranged
        (idx,), = np.where(Y > 0)
        assert np.asarray(labels[idx]) == X
    assert Y.dtype == dtype

    X2 = sp.decode(Y, labels, assume_sorted=assume_sorted)

    assert close_enough(X, X2)


@given(
    gufunc(
        "(m),(n),(),(),()->(n)",
        dtype=[np.int_, CAT_DTYPE, np.bool_, str, np.bool_],
        elements=[
            integers(0, INT_MAX),
            from_dtype(np.dtype(CAT_DTYPE)),
            booleans(),
            sampled_from(ENCODER_DTYPES),
            booleans(),
        ],
        unique=[False, True, False, False, False],
        min_side={"m": 1, "n": 3},
    ).map(encoder_gen)
)
def test_encoder_to_sklearn(args):
    # sklearn cannot handle this correctly unless n >= 3
    X, labels, assume_sorted, dtype, assume_valid = args

    Y = sp.encode(X, labels, assume_sorted=assume_sorted, dtype=dtype, assume_valid=assume_valid)

    enc = LabelBinarizer()
    enc.fit(labels)
    Y2 = enc.transform(X)

    assert close_enough(Y, Y2.astype(dtype))


@given(
    gufunc(
        "(m,n),(n),(),(),()->(n)",
        dtype=[np.float_, CAT_DTYPE, np.bool_, str, np.bool_],
        elements=[floats(), from_dtype(np.dtype(CAT_DTYPE)), booleans(), sampled_from(ENCODER_DTYPES), booleans()],
        unique=[False, True, False, False, False],
        min_side={"n": 1},
    ).map(decoder_gen)
)
def test_decode_encode(args):
    Y, labels, assume_sorted, dtype, assume_valid = args

    assert Y.ndim >= 1 and Y.shape[-1] == len(labels)

    X = sp.decode(Y, labels, assume_sorted=assume_sorted)
    Y2 = sp.encode(X, labels, assume_sorted=assume_sorted, dtype=dtype, assume_valid=assume_valid)

    # The encoding is defined as the argmax
    assert np.all(Y.argmax(axis=1) == Y2.argmax(axis=1))
    assert np.all(np.sum(Y2 != 0, axis=1) == 1)
    assert np.all(np.sum(Y2 == 1, axis=1) == 1)


@given(
    gufunc(
        "(m,n),(n),(),(),()->(n)",
        dtype=[np.float_, CAT_DTYPE, np.bool_, str, np.bool_],
        elements=[floats(), from_dtype(np.dtype(CAT_DTYPE)), booleans(), sampled_from(ENCODER_DTYPES), booleans()],
        unique=[False, True, False, False, False],
        min_side={"m": 1, "n": 3},
    ).map(decoder_gen)
)
def test_decode_to_sklearn(args):
    Y, labels, assume_sorted, dtype, assume_valid = args

    assert Y.ndim >= 1 and Y.shape[-1] == len(labels)

    X = sp.decode(Y, labels, assume_sorted=assume_sorted)

    enc = LabelBinarizer()
    enc.fit(labels)
    X2 = enc.inverse_transform(Y)

    assert X.dtype.kind == CAT_KIND
    assert close_enough(X, X2.astype(X.dtype))


def test_encode_broadcast_bool():
    broadcast_tester(
        sp.encode,
        "(),(n),(),(),()->(n)",
        otype=bool,
        excluded=(1, 2, 3, 4),
        dtype=[np.int_, CAT_DTYPE, np.bool_, object, np.bool_],
        elements=[integers(0, INT_MAX), from_dtype(np.dtype(CAT_DTYPE)), booleans(), just("bool"), booleans()],
        unique=[False, True, False, False, False],
        min_side={"n": 1},
        map_=encoder_gen,
    )


def test_encode_broadcast_int():
    broadcast_tester(
        sp.encode,
        "(),(n),(),(),()->(n)",
        otype=int,
        excluded=(1, 2, 3, 4),
        dtype=[np.int_, CAT_DTYPE, np.bool_, object, np.bool_],
        elements=[integers(0, INT_MAX), from_dtype(np.dtype(CAT_DTYPE)), booleans(), just("int"), booleans()],
        unique=[False, True, False, False, False],
        min_side={"n": 1},
        map_=encoder_gen,
    )


def test_encode_broadcast_float():
    broadcast_tester(
        sp.encode,
        "(),(n),(),(),()->(n)",
        otype=float,
        excluded=(1, 2, 3, 4),
        dtype=[np.int_, CAT_DTYPE, np.bool_, object, np.bool_],
        elements=[integers(0, INT_MAX), from_dtype(np.dtype(CAT_DTYPE)), booleans(), just("float"), booleans()],
        unique=[False, True, False, False, False],
        min_side={"n": 1},
        map_=encoder_gen,
    )


def test_decode_broadcast_bool():
    broadcast_tester(
        sp.decode,
        "(m,n),(n),()->(m)",
        otype=CAT_DTYPE,
        excluded=(1, 2),
        dtype=[np.bool_, CAT_DTYPE, np.bool_],
        elements=[booleans(), from_dtype(np.dtype(CAT_DTYPE)), booleans()],
        unique=[False, True, False],
        min_side={"n": 1},
        map_=decoder_gen_broadcast,
    )


def test_decode_broadcast_int():
    broadcast_tester(
        sp.decode,
        "(m,n),(n),()->(m)",
        otype=CAT_DTYPE,
        excluded=(1, 2),
        dtype=[np.int_, CAT_DTYPE, np.bool_],
        elements=[integers(INT_MIN, INT_MAX), from_dtype(np.dtype(CAT_DTYPE)), booleans()],
        unique=[False, True, False],
        min_side={"n": 1},
        map_=decoder_gen_broadcast,
    )


def test_decode_broadcast_float():
    broadcast_tester(
        sp.decode,
        "(m,n),(n),()->(m)",
        otype=CAT_DTYPE,
        excluded=(1, 2),
        dtype=[np.float_, CAT_DTYPE, np.bool_],
        elements=[floats(), from_dtype(np.dtype(CAT_DTYPE)), booleans()],
        unique=[False, True, False],
        min_side={"n": 1},
        map_=decoder_gen_broadcast,
    )


@given(gufunc("()->()", dtype=np.float_, elements=floats()))
def test_bilog_props(args):
    x, = args

    y = sp.bilog(x)

    assert sp.bilog(0) == 0  # This could be its own test
    assert close_enough(y, -sp.bilog(-x), equal_nan=True)
    assert np.isfinite(y) == np.isfinite(x)


@given(gufunc_floats("(2)->(2)", allow_infinity=False, allow_nan=False))
def test_bilog_monotonic(args):
    x, = args

    x1, x2 = sorted(np.abs(x))

    assert sp.bilog(x1) < sp.bilog((1 + 1e-6) * x2 + 1e-6)


@given(gufunc("()->()", dtype=np.float_, elements=floats()))
def test_bilog_biexp(args):
    x, = args

    assert close_enough(sp.biexp(sp.bilog(x)), x, equal_nan=True)


def test_bilog_broadcast():
    broadcast_tester(sp.bilog, "()->()", otype=float)


def test_biexp_broadcast():
    broadcast_tester(sp.biexp, "()->()", otype=float, min_value=-10, max_value=10)


@given(sampled_from(WARPS), gufunc_floats("(n),(m)->(n)", allow_infinity=False, allow_nan=False))
def test_real_values_warp_unwarp(warp, args):
    x, values = args

    if warp == "log":
        values = values[values > 0]
    if warp == "logit":
        values = values[(0 < values) & (values < 1)]

    # We could eliminate need for this if we split out test for log and logit
    # cases and specify unique flag, but works as is
    v = np.unique(values)
    assume(len(v) >= 2)

    f = interp1d(v, v, kind="nearest", fill_value="extrapolate")
    x = f(x)
    assert x.ndim == 1  # make sure interp1d did not mess it up

    S = sp.Real(warp=warp, values=values)
    y = S.warp(x)
    assert y.shape == x.shape + (1,)
    assert y.dtype == sp.WARPED_DTYPE

    # Test bounds
    lower, upper = S.get_bounds().T
    assert np.all(lower <= y)
    assert np.all(y <= upper)

    y2 = S.validate_warped(y)
    assert close_enough(y, y2)

    x2 = S.unwarp(y)
    assert x2.shape == x.shape
    x3 = S.validate(x2)
    assert close_enough(x2, x3)

    assert close_enough(x, x2)


@given(sampled_from(WARPS), gufunc_floats("(n),(2)->(n)", allow_infinity=False, allow_nan=False))
def test_real_range_warp_unwarp(warp, args):
    x, range_ = args

    if warp == "log":
        range_ = range_[range_ > 0]
    if warp == "logit":
        range_ = range_[(0 < range_) & (range_ < 1)]

    range_ = np.sort(range_)
    assume(len(range_) == 2 and range_[0] < range_[1])

    x = np.clip(x, range_[0], range_[1])

    S = sp.Real(warp=warp, range_=range_)

    y = S.warp(x)
    assert y.shape == x.shape + (1,)
    assert y.dtype == sp.WARPED_DTYPE

    # Test bounds
    lower, upper = S.get_bounds().T
    assert np.all(lower <= y)
    assert np.all(y <= upper)

    y2 = S.validate_warped(y)
    assert close_enough(y, y2)

    x2 = S.unwarp(y)
    assert x2.shape == x.shape
    x3 = S.validate(x2)
    assert close_enough(x2, x3)

    assert close_enough(x, x2)


# Note to really stress test this we should elim min and max val, but that
# requires that we split out a diff test func for log and logit
@given(sampled_from(WARPS), gufunc_floats("(n,1),(2)->(n)", min_value=-1000, max_value=1000))
def test_real_range_unwarp_warp(warp, args):
    x_w, range_ = args

    if warp == "log":
        range_ = range_[range_ > 0]
    if warp == "logit":
        range_ = range_[(0 < range_) & (range_ < 1)]

    range_ = np.sort(range_)
    assume(len(range_) == 2 and range_[0] < range_[1])

    range_warped = sp.WARP_DICT[warp](range_)

    x_w = np.clip(x_w, range_warped[0], range_warped[1])

    S = sp.Real(warp=warp, range_=range_)

    # Test bounds
    lower, upper = S.get_bounds().T
    x_w = linear_rescale(x_w, lb0=-1000, ub0=1000, lb1=lower, ub1=upper)

    x = S.unwarp(x_w)
    assert x_w.shape == x.shape + (1,)
    assert x.dtype == range_.dtype
    assert x.dtype == S.dtype

    x2 = S.validate(x)
    assert close_enough(x, x2)

    x_w2 = S.warp(x)
    assert x_w2.shape == x_w.shape
    x_w3 = S.validate_warped(x_w2)
    assert close_enough(x_w2, x_w3)

    assert close_enough(x_w, x_w2)


@given(
    sampled_from(("linear", "bilog")),
    gufunc("(n),(m)->(n)", dtype=np.int_, elements=integers(INT_MIN, INT_MAX), unique=[False, True], min_side={"m": 2}),
)
def test_int_values_warp_unwarp(warp, args):
    x, values = args

    v = np.unique(values)  # Also sort
    assert len(v) >= 2
    f = interp1d(v, v, kind="nearest", fill_value="extrapolate")
    x = f(x).astype(values.dtype)
    assert x.ndim == 1  # make sure interp1d did not mess it up

    S = sp.Integer(warp=warp, values=values)
    y = S.warp(x)
    assert y.shape == x.shape + (1,)
    assert y.dtype == sp.WARPED_DTYPE

    # Test bounds
    lower, upper = S.get_bounds().T
    assert np.all(lower <= y)
    assert np.all(y <= upper)

    y2 = S.validate_warped(y)
    assert close_enough(y, y2)

    x2 = S.unwarp(y)
    assert x2.shape == x.shape
    x3 = S.validate(x2)
    assert close_enough(x2, x3)

    assert close_enough(x, x2)


@given(gufunc("(n),(m)->(n)", dtype=np.int_, elements=integers(1, INT_MAX), unique=[False, True], min_side={"m": 2}))
def test_log_int_values_warp_unwarp(args):
    x, values = args

    warp = "log"

    v = np.unique(values)  # Also sort
    assert len(v) >= 2
    f = interp1d(v, v, kind="nearest", fill_value="extrapolate")
    x = f(x).astype(values.dtype)
    assert x.ndim == 1  # make sure interp1d did not mess it up

    S = sp.Integer(warp=warp, values=values)
    y = S.warp(x)
    assert y.shape == x.shape + (1,)
    assert y.dtype == sp.WARPED_DTYPE

    # Test bounds
    lower, upper = S.get_bounds().T
    assert np.all(lower <= y)
    assert np.all(y <= upper)

    y2 = S.validate_warped(y)
    assert close_enough(y, y2)

    x2 = S.unwarp(y)
    assert x2.shape == x.shape
    x3 = S.validate(x2)
    assert close_enough(x2, x3)

    assert close_enough(x, x2)


@given(sampled_from(("linear", "bilog", "log")), gufunc("(n),(2)->(n)", dtype=np.int_, elements=integers(-1000, 1000)))
def test_int_range_warp_unwarp(warp, args):
    """Warning: this explicitly ignores issues with min max if going to int
    limit, since
    >>> np.array(INT_MAX).astype(np.float32).astype(np.int32)
    array(-2147483648, dtype=int32)
    Without any warning from numpy.
    """
    x, range_ = args

    # We could split out log into diff function without this pruning if we
    # start failing hypothesis health check.
    if warp == "log":
        range_ = range_[range_ > 0]

    range_ = np.sort(range_)
    assume(len(range_) == 2 and range_[0] < range_[1])

    x = np.clip(x, range_[0], range_[1]).astype(range_.dtype)

    S = sp.Integer(warp=warp, range_=range_)
    y = S.warp(x)
    assert y.shape == x.shape + (1,)
    assert y.dtype == sp.WARPED_DTYPE

    # Test bounds
    lower, upper = S.get_bounds().T
    assert np.all(lower <= y)
    assert np.all(y <= upper)

    y2 = S.validate_warped(y)
    assert close_enough(y, y2)

    x2 = S.unwarp(y)
    assert x2.shape == x.shape
    x3 = S.validate(x2)
    assert close_enough(x2, x3)

    assert x.dtype == x2.dtype
    # Close enough when evaluated as floats
    assert close_enough(x.astype("f"), x2.astype("f"))


@given(gufunc("(n)->(n)", dtype=np.bool_, elements=booleans()))
def test_bool_warp_unwarp(args):
    x, = args

    S = sp.Boolean()
    y = S.warp(x)
    assert y.shape == x.shape + (1,)
    assert y.dtype == sp.WARPED_DTYPE

    # Test bounds
    lower, upper = S.get_bounds().T
    assert np.all(lower <= y)
    assert np.all(y <= upper)

    y2 = S.validate_warped(y)
    assert close_enough(y, y2)

    x2 = S.unwarp(y)
    assert x2.shape == x.shape
    x3 = S.validate(x2)
    assert close_enough(x2, x3)

    assert close_enough(x, x2)


@given(
    gufunc(
        "(n),(m)->(n)",
        dtype=[np.int_, CAT_DTYPE],
        elements=[integers(0, INT_MAX), from_dtype(np.dtype(CAT_DTYPE))],
        unique=[False, True],
        min_side={"m": 2},
    )
)
def test_cat_warp_unwarp(args):
    x, values = args

    assert len(set(values)) >= 2

    x = values[x % len(values)]
    assert x.ndim == 1

    S = sp.Categorical(values=values)
    y = S.warp(x)
    assert y.shape == x.shape + (len(values),)
    assert y.dtype == sp.WARPED_DTYPE

    # Test bounds
    lower, upper = S.get_bounds().T
    assert np.all(lower <= y)
    assert np.all(y <= upper)

    y2 = S.validate_warped(y)
    assert close_enough(y, y2)

    x2 = S.unwarp(y)
    assert x2.shape == x.shape
    x3 = S.validate(x2)
    assert close_enough(x2, x3)

    assert close_enough(x, x2)


@given(space_configs())
def test_joint_space_unwarp_warp(args):
    meta, X, _, _ = args

    S = sp.JointSpace(meta)
    S.validate(X)

    X_w2 = S.warp(X)
    assert X_w2.dtype == sp.WARPED_DTYPE

    # Test bounds
    lower, upper = S.get_bounds().T
    assert np.all(lower <= X_w2)
    assert np.all(X_w2 <= upper)

    X2 = S.unwarp(X_w2)

    assert all(all(close_enough(X[ii][vv], X2[ii][vv]) for vv in X[ii]) for ii in range(len(X)))
    S.validate(X2)


@given(space_configs())
def test_joint_space_warp_missing(args):
    meta, X, _, fixed_vars = args

    S = sp.JointSpace(meta)

    X_w = S.warp([fixed_vars])
    assert X_w.dtype == sp.WARPED_DTYPE

    # Test bounds
    lower, upper = S.get_bounds().T
    assert np.all((lower <= X_w) | np.isnan(X_w))
    assert np.all((X_w <= upper) | np.isnan(X_w))

    for param, xx in zip(S.param_list, np.hsplit(X_w, S.blocks[:-1])):
        xx, = xx
        if param in fixed_vars:
            x_orig = S.spaces[param].unwarp(xx).item()
            S.spaces[param].validate(x_orig)
            assert close_enough(x_orig, fixed_vars[param])

            # check other direction
            x_w2 = S.spaces[param].warp(fixed_vars[param])
            assert close_enough(xx, x_w2)
        else:
            assert np.all(np.isnan(xx))


@given(space_configs())
def test_joint_space_warp_fixed_vars(args):
    meta, X, _, fixed_vars = args

    # set X vals equal to fixed_vars
    for xx in X:
        for param in fixed_vars:
            xx[param] = fixed_vars[param]

    S = sp.JointSpace(meta)
    lower, upper = S.get_bounds().T

    X_w = S.warp(X)
    assert X_w.dtype == sp.WARPED_DTYPE

    # Test bounds
    lower, upper = S.get_bounds().T
    assert np.all(lower <= X_w)
    assert np.all(X_w <= upper)

    X2 = S.unwarp(X_w, fixed_vals=fixed_vars)

    # Make sure we get == not just close in unwarp for fixed vars
    for xx in X2:
        for param in fixed_vars:
            assert xx[param] == fixed_vars[param]


@given(space_configs(), integers(min_value=0, max_value=10))
def test_joint_grid(args, max_interp):
    meta, _, _, _ = args

    type_whitelist = (bool, int, float, CAT_NATIVE_DTYPE)

    S = sp.JointSpace(meta)

    lower, upper = S.get_bounds().T

    G = S.grid(max_interp=max_interp)
    assert sorted(G.keys()) == sorted(meta.keys())

    for var, grid in G.items():
        curr_space = S.spaces[var]

        # Make sure same as calling direct
        grid2 = curr_space.grid(max_interp)
        assert grid == grid2

        if len(grid) == 0:
            assert grid == []
            assert max_interp == 0 if curr_space.values is None else len(curr_space.values) == 0
            continue

        # Make sure native type
        assert all(type(xx) in type_whitelist for xx in grid)
        tt = type(grid[0])
        assert all(type(xx) == tt for xx in grid)

        assert np.all(np.array(grid) == np.unique(grid))

        if max_interp >= 2:
            assert curr_space.lower is None or close_enough(curr_space.lower, grid[0])
            assert curr_space.upper is None or close_enough(curr_space.upper, grid[-1])

        if curr_space.values is not None:
            assert np.all(curr_space.values == grid)
        else:
            assert len(grid) <= max_interp
            # Could else, check approx linear in warped space, but good enough
            # for now.


def test_unravel_index_empty():
    assert sp.unravel_index(()) == ()


@given(lists(integers(0, 2), min_size=1))
def test_unravel_index_empty_2(dims):
    if np.prod(dims) > 0:
        dims[0] = 0
    dims = tuple(dims)

    assert sp.unravel_index(dims) == ()


================================================
FILE: test/stats_test.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import scipy.stats as sst
from hypothesis import assume, given
from hypothesis.strategies import integers, lists, sampled_from
from hypothesis_gufunc.gufunc import gufunc_args
from sklearn.preprocessing import robust_scale

from bayesmark import stats
from hypothesis_util import close_enough, mfloats, probs, seeds


def t_test_(x):
    """Perform a standard t-test to test if the values in `x` are sampled from
    a distribution with a zero mean.

    Parameters
    ----------
    x : array-like, shape (n_samples,)
        array of data points to test.

    Returns
    -------
    pval : float
        p-value (in [0,1]) from t-test on `x`.
    """
    assert np.ndim(x) == 1 and (not np.any(np.isnan(x)))

    if (len(x) <= 1) or (not np.all(np.isfinite(x))):
        return 1.0  # Can't say anything about scale => p=1

    _, pval = sst.ttest_1samp(x, 0.0)
    if np.isnan(pval):
        # Should only be possible if scale underflowed to zero:
        assert np.var(x, ddof=1) <= 1e-100
        # It is debatable if the condition should be ``np.mean(x) == 0.0`` or
        # ``np.all(x == 0.0)``. Should not matter in practice.
        pval = np.float(np.mean(x) == 0.0)
    assert 0.0 <= pval and pval <= 1.0
    return pval


@given(gufunc_args("(n),()->(n)", dtype=np.float_, elements=[mfloats(), probs()], min_side=2))
def test_robust_standardize_to_sklearn(args):
    X, q_level = args

    q0, q1 = 0.5 * (1.0 - q_level), 0.5 * (1.0 + q_level)
    assert close_enough(q1 - q0, q_level)

    X_bo = stats.robust_standardize(X, q_level=q_level)

    X = X[:, None]
    X_skl = robust_scale(X, axis=0, with_centering=True, with_scaling=True, quantile_range=[100.0 * q0, 100.0 * q1])
    X_skl = X_skl[:, 0] * (sst.norm.ppf(q1) - sst.norm.ppf(q0))

    assert close_enough(X_bo, X_skl, equal_nan=True)


def test_robust_standardize_broadcast():
    """Need to do things different here since standardize broadcasts over the
    wrong dimension (0 instead of -1).
    """
    # Build vectorize version, this is just loop inside.
    f_vec = np.vectorize(stats.robust_standardize, signature="(n),()->(n)", otypes=["float64"])

    @given(gufunc_args("(n,m),()->(n,m)", dtype=np.float_, min_side={"n": 2}, elements=[mfloats(), probs()]))
    def test_f(args):
        X, q_level = args

        R1 = stats.robust_standardize(X, q_level)
        R2 = f_vec(X.T, q_level).T
        assert R1.dtype == "float64"
        assert R2.dtype == "float64"
        assert close_enough(R1, R2, equal_nan=True)

    # Call the test
    test_f()


@given(integers(0, 10), mfloats(), probs())
def test_t_EB_zero_var(N, val, alpha):
    x = val + np.zeros(N)
    EB = stats.t_EB(x, alpha=alpha)
    if N <= 1:
        assert EB == np.inf
    else:
        assert np.allclose(EB, 0.0)


@given(integers(1, 10), sampled_from([np.inf, -np.inf]), probs())
def test_t_EB_inf(N, val, alpha):
    x = np.zeros(N)
    x[0] = val

    EB = stats.t_EB(x, alpha=alpha)
    if N <= 1:
        assert EB == np.inf
    else:
        assert np.isnan(EB)


@given(seeds(), probs(), integers(2, 10))
def test_t_EB_coverage(seed, alpha, N):
    trials = 100

    random_st = np.random.RandomState(seed)

    fail = 0
    for tt in range(trials):
        x = random_st.randn(N)

        EB = stats.t_EB(x, alpha=alpha)
        mu = np.nanmean(x)
        LB, UB = mu - EB, mu + EB
        assert np.isfinite(LB) and np.isfinite(UB)
        fail += (0.0 < LB) or (UB < 0.0)
    pval = sst.binom_test(fail, trials, alpha)

    assert pval >= 0.05 / 100  # Assume we run 100 times


@given(lists(mfloats(), min_size=2))
def test_t_test_to_EB(x):
    pval = t_test_(x)
    assume(0.0 < pval and pval < 1.0)

    EB = stats.t_EB(x, alpha=pval)
    assert np.allclose(np.abs(np.mean(x)), EB)


================================================
FILE: test/util.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from hypothesis import assume
from hypothesis.extra.numpy import arrays
from hypothesis.strategies import (
    binary,
    booleans,
    composite,
    dictionaries,
    floats,
    from_regex,
    frozensets,
    integers,
    lists,
    sampled_from,
    text,
)
from hypothesis_gufunc.extra.xr import fixed_dataarrays, simple_coords, xr_coords

import bayesmark.space as sp
from bayesmark.constants import ARG_DELIM, ITER, METHOD, RANDOM_SEARCH, SUGGEST, TEST_CASE, TRIAL
from bayesmark.np_util import linear_rescale

NULL_PLUG = "\x00"


def _easy_text():
    # The NULL_PLUG confuses numpy arrays, so assume that is not in
    S = text().filter(lambda ss: NULL_PLUG not in ss)
    return S


def _hashable():
    S = floats() | integers() | _easy_text()
    return S


CAT_STGY = _easy_text if sp.CAT_KIND == "U" else binary

F_MIN = np.nextafter(0, 1)

RANGES = {"linear": (-1000, 1000), "log": (F_MIN, 1000), "logit": (F_MIN, np.nextafter(1, 0)), "bilog": (-100, 100)}

SPACES = tuple(sorted(sp.SPACE_DICT.keys()))


@composite
def space_vars(draw, max_values=5):
    """Build composite strategy for random API calls."""
    type_ = draw(sampled_from(SPACES))
    use_values = draw(booleans())

    if type_ == "real":
        warp = draw(sampled_from(("linear", "log", "logit", "bilog")))
        min_val, max_val = RANGES[warp]
        if use_values:
            # Generating unique values to ensure that always have more than 2
            # unique values, but code is designed to accept non-unique values
            # arrays as long as more than 2 non-unique. Could generalize this.
            values = draw(lists(floats(min_val, max_val), min_size=2, max_size=max_values, unique=True))
            D = {"type": type_, "space": warp, "values": values}
        else:
            range_ = tuple(sorted(draw(lists(floats(min_val, max_val), min_size=2, max_size=2, unique=True))))
            D = {"type": type_, "space": warp, "range": range_}
    elif type_ == "int":
        warp = draw(sampled_from(("linear", "log", "bilog")))
        min_val, max_val = RANGES[warp]
        # Must shrink these to next integers in range to keep hypothesis happy
        min_val = int(np.ceil(min_val))
        max_val = int(np.floor(max_val))
        if use_values:
            values = draw(lists(integers(min_val, max_val), min_size=2, max_size=max_values, unique=True))
            D = {"type": type_, "space": warp, "values": values}
        else:
            range_ = tuple(sorted(draw(lists(integers(min_val, max_val), min_size=2, max_size=2, unique=True))))
            D = {"type": type_, "space": warp, "range": range_}
    elif type_ == "bool":
        D = {"type": type_}
    elif type_ == "cat" or type_ == "ordinal":
        values = draw(lists(CAT_STGY(), min_size=2, max_size=max_values, unique=True))
        # This assume is needed because np.unique has bug for null plug
        # .. >>> np.unique([u'', u'\x00'])
        # .. array([u''], dtype='<U1')
        assume(len(np.unique(values)) == len(values))
        D = {"type": type_, "values": values}
    else:
        assert False

    return D


@composite
def space_configs(draw, max_vars=5, max_len=5, allow_missing=False, unique_y=False):
    meta = draw(dictionaries(text(), space_vars(), min_size=1, max_size=max_vars))

    S = sp.JointSpace(meta)
    lower, upper = S.get_bounds().T

    D = sum(len(var["values"]) if var["type"] in ("cat", "ordinal") else 1 for var in meta.values())

    # Let's draw warped variable because that will be a lot easier
    N = draw(integers(min_value=0, max_value=max_len))
    X_w = draw(arrays(dtype=float, shape=(N, D), elements=floats(min_value=0.0, max_value=1.0)))
    X_w = linear_rescale(X_w, lb0=0.0, ub0=1.0, lb1=lower, ub1=upper)
    X = S.unwarp(X_w)

    # Draw output too in case we want it
    y_elements = floats(allow_infinity=False, allow_nan=allow_missing)
    y = draw(arrays(dtype=float, shape=(N,), elements=y_elements, unique=unique_y))

    # Draw the fixed vars
    X_fixed_w = draw(arrays(dtype=float, shape=(1, D), elements=floats(min_value=0.0, max_value=1.0)))
    X_fixed_w = linear_rescale(X_fixed_w, lb0=0.0, ub0=1.0, lb1=lower, ub1=upper)
    X_fixed, = S.unwarp(X_fixed_w)

    # Make fixed_vars a subset of all vars.
    keep_in_fixed = draw(frozensets(sampled_from(tuple(X_fixed.keys()))))
    X_fixed = {k: X_fixed[k] for k in keep_in_fixed}

    return meta, X, y, X_fixed


_test_cases = _easy_text


def perf_dataarrays(min_trial=1):
    dims = (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)
    # Don't get too close to infinity because that can also create issues and isn't supported
    elements = floats(allow_nan=False, min_value=-1e300, max_value=1e300)

    ref = RANDOM_SEARCH + ARG_DELIM
    method_names = from_regex("^%s[A-Z]*" % ref) | text()
    method_st = xr_coords(elements=method_names).filter(lambda L: any(ss.startswith(ref) for ss in L))

    coords_st = {
        ITER: simple_coords(min_side=1),
        SUGGEST: simple_coords(min_side=1),
        TRIAL: simple_coords(min_side=min_trial),
        METHOD: method_st,
    }
    S = fixed_dataarrays(dims, dtype=np.float_, elements=elements, coords_elements=_test_cases(), coords_st=coords_st)
    return S


================================================
FILE: test/util_test.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import random as pyrandom
import shlex

from hypothesis import assume, given
from hypothesis.strategies import floats, integers, iterables, lists, text

from bayesmark import util as bobm_util
from util import _hashable


def some_mock_f(x):
    """Some arbitrary deterministic test function.
    """
    random_stream = pyrandom.Random(hash(x))
    y = random_stream.gauss(0, 1)
    return y


@given(_hashable(), lists(_hashable()))
def test_in_or_none(x, L):
    val = bobm_util.in_or_none(x, L)
    assert isinstance(val, bool)
    assert val == (x in L)
    assert val == (x in set(L))


@given(_hashable())
def test_in_or_none_on_none(x):
    val = bobm_util.in_or_none(x, None)
    assert isinstance(val, bool)
    assert val


@given(lists(_hashable()))
def test_in_or_none_self(L):
    for xx in L:
        val = bobm_util.in_or_none(xx, L)
        assert isinstance(val, bool)
        assert val


@given(lists(_hashable()))
def test_all_unique(L):
    bobm_util.all_unique(L)


@given(lists(integers(), unique=True) | lists(text(), unique=True) | lists(floats(), unique=True))
def test_strict_sorted(L):
    bobm_util.strict_sorted(L)


@given(integers(-5, 1000))
def test_range_str(stop):
    list(bobm_util.range_str(stop))


@given(text(), lists(text()))
def test_str_join_safe(delim, str_vec):
    assume(not any(delim in ss for ss in str_vec))
    bobm_util.str_join_safe(delim, str_vec, append=False)


@given(text(), lists(text()), lists(text()))
def test_str_join_safe_append(delim, str_vec0, str_vec):
    assume(not any(delim in ss for ss in str_vec0))
    assume(not any(delim in ss for ss in str_vec))

    start = bobm_util.str_join_safe(delim, str_vec0, append=False)
    bobm_util.str_join_safe(delim, [start] + str_vec, append=True)


@given(lists(text()))
def test_shell_join(argv):
    cmd = bobm_util.shell_join(argv, delim=" ")

    assert shlex.split(cmd) == list(argv)


@given(text(), text(min_size=1))
def test_chomp(str_val, ext):
    bobm_util.chomp(str_val + ext, ext)


@given(iterables(_hashable()))
def test_preimage_func(x):
    bobm_util.preimage_func(some_mock_f, x)


================================================
FILE: test/xr_util_test.py
================================================
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import OrderedDict
from itertools import product

import xarray as xr
from hypothesis import assume, given, settings
from hypothesis.strategies import dictionaries, floats, integers, just, sampled_from, tuples
from hypothesis_gufunc.extra.xr import (
    _hashable,
    dataarrays,
    datasets,
    fixed_datasets,
    simple_dataarrays,
    subset_lists,
    vars_to_dims_dicts,
    xr_vars,
)

import bayesmark.xr_util as xru

xr_fill = _hashable


def intersect_seq(L):
    if len(L) == 0:
        return set([])

    S = set(L[0])
    for xx in L[1:]:
        S = S & set(xx)
    return S


def ds_vars_dims():
    def build_it(vars_to_dims_):
        all_dims = list(set(sum((list(dd) for dd in vars_to_dims_.values()), [])))

        ds = fixed_datasets(vars_to_dims_)
        vars_ = subset_lists(list(vars_to_dims_.keys()))
        dims = subset_lists(all_dims)
        return tuples(ds, vars_, dims)

    vars_to_dims_st = vars_to_dims_dicts()

    S = vars_to_dims_st.flatmap(build_it)
    return S


def ds_vars_dims_mixed():
    def build_it(vars_to_dims_):
        all_dims = list(set(sum((list(dd) for dd in vars_to_dims_.values()), [])))

        ds = fixed_datasets(vars_to_dims_)

        dims = subset_lists(all_dims)

        vars_ = sampled_from(list(vars_to_dims_.keys()))
        vars_dict = dictionaries(vars_, dims, dict_class=OrderedDict)
        vars_dict = vars_dict.map(OrderedDict.items).map(list)

        return tuples(ds, vars_dict, just(all_dims))

    vars_to_dims_st = vars_to_dims_dicts(min_vars=0, min_dims=0)

    S = vars_to_dims_st.flatmap(build_it)
    return S


@given(simple_dataarrays(("foo", "bar", "baz")) | dataarrays() | dataarrays(coords_elements=floats()), integers(0, 3))
def test_is_simple_coords(da, min_side):
    xru.is_simple_coords(da.coords, min_side=min_side)


@given(simple_dataarrays(("foo", "bar", "baz")))
def test_is_simple_coords_pass(da):
    simple = xru.is_simple_coords(da.coords)
    assert simple


@given(ds_vars_dims(), xr_fill())
def test_ds_like(args, fill):
    ref, vars_, dims = args

    xru.ds_like(ref, vars_, dims, fill=fill)


@given(ds_vars_dims_mixed(), xr_fill())
def test_ds_like_mixed(args, fill):
    ref, vars_, dims = args

    xru.ds_like_mixed(ref, vars_, dims, fill=fill)


@given(xr_vars(), dataarrays())
def test_only_dataarray(var_, da):
    assume(var_ not in da.dims)

    ds = xr.Dataset({var_: da})

    xru.only_dataarray(ds)


@given(datasets())
def test_coord_compat(ds):
    all_dims = [ds[kk].dims for kk in ds]
    common_dims = sorted(intersect_seq(all_dims))
    da_seq = [ds[kk] for kk in ds]

    compat = xru.coord_compat(da_seq, common_dims)
    assert compat


@given(datasets())
def test_coord_compat_false(ds):
    all_dims = [ds[kk].dims for kk in ds]
    common_dims = sorted(intersect_seq(all_dims))
    da_seq = [ds[kk] for kk in ds]

    assume(len(da_seq) > 0)
    assume(len(da_seq[0].dims) > 0)

    da = da_seq[0]
    kk = da.dims[0]
    da_seq[0] = da.assign_coords(**{kk: range(da.sizes[kk])})

    xru.coord_compat(da_seq, common_dims)


@given(dataarrays(min_dims=1, max_dims=1))
def test_da_to_string(da):
    xru.da_to_string(da)


@given(dataarrays(min_side=0, min_dims=0), integers(1, 3))
@settings(deadline=None)
def test_da_concat(da, n):
    assume(n < len(da.dims))

    da_dict, keys_to_slice = da_split(da, n)
    assume(len(da_dict) > 0)
    assert len(keys_to_slice) == n

    xru.da_concat(da_dict, dims=keys_to_slice)


def da_split(da, n):
    assert 0 < n
    assert n <= len(da.dims)

    keys_to_slice = da.dims[-n:]
    da_dict = {}
    vals = [da.coords[kk].values.tolist() for kk in keys_to_slice]
    for vv in product(*vals):
        lookup = dict(zip(keys_to_slice, vv))
        da_dict[tuple(vv)] = da.sel(lookup, drop=True)
    return da_dict, keys_to_slice


@given(datasets(min_side=1, min_dims=1), integers(1, 3))
@settings(deadline=None)
def test_ds_concat(ds, n):
    all_dims = [ds[kk].dims for kk in ds]
    common_dims = sorted(intersect_seq(all_dims))

    n = min([n, len(common_dims) - 1])
    assume(0 < n)

    keys_to_slice = common_dims[:n]
    ds_dict = {}
    vals = [ds.coords[kk].values.tolist() for kk in keys_to_slice]
    for vv in product(*vals):
        lookup = dict(zip(keys_to_slice, vv))
        ds_dict[vv] = ds.sel(lookup, drop=True)

    xru.ds_concat(ds_dict, dims=keys_to_slice)


================================================
FILE: test.sh
================================================
#!/bin/bash

set -ex
set -o pipefail

# Set conda paths
export CONDA_PATH=./tmp/conda
export CONDA_ENVS=env

# Sometime pip PIP_REQUIRE_VIRTUALENV has issues with conda
export PIP_REQUIRE_VIRTUALENV=false

PY_VERSIONS=( "3.6" "3.7" )

# Handy to know what we are working with
git --version

# Cleanup workspace, src for any old -e installs
git clean -x -f -d
rm -rf src/

# Install miniconda
if command -v conda 2>/dev/null; then
    echo "Conda already installed"
else
    # We need to use miniconda since we can't figure out ho to install py3.6 in
    # this env image. We could also use Miniconda3-latest-Linux-x86_64.sh but
    # pinning version to make reprodicible.
    echo "Installing miniconda"
    if [[ "$OSTYPE" == "darwin"* ]]; then
        # In future let's also try, for reprodicibility:
        # curl -L -o miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-4.5.12-MacOSX-x86_64.sh;
        curl -L -o miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh;
    else
        # In future let's also try, for reprodicibility:
        # curl -L -o miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-4.5.12-Linux-x86_64.sh;
        curl -L -o miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh;
    fi
    chmod +x ./miniconda.sh
    ./miniconda.sh -b -p $CONDA_PATH
    rm ./miniconda.sh
fi
export PATH=$CONDA_PATH/bin:$PATH

# Setup env just for installing pre-commit to run hooks on all files
rm -rf "$CONDA_ENVS"
ENV_PATH="${CONDA_ENVS}/bobm_commit_hooks"
conda create -y -q -p $ENV_PATH python=3.6
echo $ENV_PATH
source activate $ENV_PATH
python --version
pip freeze | sort
# not listing 2nd order deps here, but probably ok
pip install -r requirements/tools.txt
# Now run hooks on all files, don't need to install hooks since run directly
pre-commit run --all-files
# Now can leave env with  pre-commit
conda deactivate
# Also check no changes to files by hooks
test -z "$(git diff)"
# clean up for good measure, but need to keep miniconda tmp folder
git clean -x -f -d --exclude=tmp

# Tool to get compare only the package names in pip file
# On mac, sed -r needs to be seed -E
nameonly () { grep -i '^[a-z0-9]' | sed -E "s/([^=]*)==.*/\1/g" | tr _ - | sort -f; }
nameveronly () { grep -i '^[a-z0-9]' | awk '{print $1}' | tr _ - | sort -f; }
pipcheck () { cat $@ | grep -i '^[a-z0-9]' | awk '{print $1}' | sed -f requirements/pipreqs_edits.sed | sort -f | uniq >ask.log && pip freeze | sed -f requirements/pipreqs_edits.sed | sort -f >got.log && diff -i ask.log got.log; }

# Now test the deps
ENV_PATH="${CONDA_ENVS}/deps_test"
conda create -y -q -p $ENV_PATH python=3.6
echo $ENV_PATH
source activate $ENV_PATH
python --version
pip freeze | sort

# Install all requirements, make sure they are mutually compatible
pip install -r requirements/base.txt
pipcheck requirements/base.txt

# Install package
python setup.py install
pipcheck requirements/base.txt requirements/self.txt

pip install -r requirements/optimizers.txt
pipcheck requirements/base.txt requirements/self.txt requirements/optimizers.txt

pip install -r requirements/test.txt
pipcheck requirements/base.txt requirements/self.txt requirements/optimizers.txt requirements/test.txt

pip install -r requirements/ipynb.txt
pipcheck requirements/base.txt requirements/self.txt requirements/test.txt requirements/optimizers.txt requirements/ipynb.txt
pip install -r requirements/docs.txt
pipcheck requirements/base.txt requirements/self.txt requirements/test.txt requirements/optimizers.txt requirements/ipynb.txt requirements/docs.txt

pip install -r requirements/tools.txt

# Make sure .in file corresponds to what is imported
nameonly <requirements/base.in >ask.log
pipreqs bayesmark/  --ignore bayesmark/builtin_opt/ --savepath requirement_chk.in
sed -f requirements/pipreqs_edits.sed requirement_chk.in | nameonly >got.log
diff ask.log got.log

nameonly <requirements/test.in >ask.log
pipreqs test/ --savepath requirement_chk.in
sed -f requirements/pipreqs_edits.sed requirement_chk.in | nameonly >got.log
diff ask.log got.log

nameonly <requirements/optimizers.in >ask.log
pipreqs bayesmark/builtin_opt/ --savepath requirement_chk.in
sed -f requirements/pipreqs_edits.sed requirement_chk.in | nameonly >got.log
diff ask.log got.log

nameonly <requirements/docs.in >ask.log
pipreqs docs/ --savepath requirement_chk.in
sed -f requirements/pipreqs_edits.sed requirement_chk.in | nameonly >got.log
diff ask.log got.log

nameonly <requirements/ipynb.in >ask.log
jupyter nbconvert --to script notebooks/*.ipynb
pipreqs notebooks/ --savepath requirement_chk.in
sed -f requirements/pipreqs_edits.sed requirement_chk.in | nameonly >got.log
diff ask.log got.log

# Make sure txt file corresponds to pip compile
# First copy the originals
for f in requirements/*.txt; do cp -- "$f" "${f%.txt}.chk"; done
# Now re-compile
# no-upgrade means that by default it keeps the 2nd order dependency versions already in the requirements txt file
# (otherwise it brings it to the very latest available version which often causes issues).
pip-compile-multi -o txt --no-upgrade

nameveronly <requirements/base.chk >ask.log
sed -f requirements/pipreqs_edits.sed requirements/base.txt | nameveronly >got.log
diff ask.log got.log

nameveronly <requirements/test.chk >ask.log
sed -f requirements/pipreqs_edits.sed requirements/test.txt | nameveronly >got.log
diff ask.log got.log

nameveronly <requirements/optimizers.chk | sed -f requirements/pipreqs_edits.sed >ask.log
sed -f requirements/pipreqs_edits.sed requirements/optimizers.txt | nameveronly >got.log
diff ask.log got.log

nameveronly <requirements/ipynb.chk | sed -f requirements/pipreqs_edits.sed >ask.log
sed -f requirements/pipreqs_edits.sed requirements/ipynb.txt | nameveronly >got.log
diff ask.log got.log

nameveronly <requirements/docs.chk | sed -f requirements/pipreqs_edits.sed >ask.log
sed -f requirements/pipreqs_edits.sed requirements/docs.txt | nameveronly >got.log
diff ask.log got.log

nameveronly <requirements/tools.chk | sed -f requirements/pipreqs_edits.sed >ask.log
sed -f requirements/pipreqs_edits.sed requirements/tools.txt | nameveronly >got.log
diff ask.log got.log

# Deactivate virtual environment
conda deactivate

# Set up environments for all Python versions and loop over them
rm -rf "$CONDA_ENVS"
for i in "${PY_VERSIONS[@]}"
do
    # Now test the deps
    ENV_PATH="${CONDA_ENVS}/unit_test"
    conda create -y -q -p $ENV_PATH python=$i
    echo $ENV_PATH
    source activate $ENV_PATH
    python --version
    pip freeze | sort

    # Install all requirements
    pip install -r requirements/test.txt

    # Install package
    python setup.py install

    # Run tests
    pytest test/ -s -v --hypothesis-seed=0 --disable-pytest-warnings --cov=bayesmark --cov-report html

    conda deactivate
done


================================================
FILE: tools/archive_branch.sh
================================================
#!/bin/bash

set -ex
set -o pipefail

DATE=$(date +"%Y%m%d")
TAGNAME=archive/$DATE-$1

# Fail if untracked files
test -z "$(git status --porcelain)"

# Fail if origin and local differ
git diff $1 origin/$1 --quiet

# Prune remotes for good measure
git remote prune origin

git checkout $1
git tag -a $TAGNAME -m "archived branch $1 on $DATE"
git checkout master
git push origin $TAGNAME

# Make sure we tagged correctly for good measure
diff <(git rev-list $TAGNAME -n 1) <(git rev-parse $1)
git ls-remote --tags origin | grep $(git rev-parse $1)

git branch -D $1
git push origin --delete $1

echo "cleaned up"


================================================
FILE: tools/deploy.sh
================================================
#!/bin/bash
#
# Note that
# UUID=$(uuidgen)
# works on Mac OS by default, but requires installation on linux.

set -ex
set -o pipefail

# Script arguments
REMOTE=$1
BRANCH=$2
PACKAGE=$3
VERSION=$4

# Check to make sure we have keys setup right before we start
git push --dry-run

# Check versions are there, this is a crude way to do it but it works
grep "^$PACKAGE==$VERSION\$" requirements/self.txt
grep '^__version__ = "'$VERSION'"$' bayesmark/__init__.py
grep 'version="'$VERSION'",$' setup.py

# Where envs go
ENVS=~/envs
# Which python version this uses
PY=python3.7
# Which env contains twine and py version we use
TWINE_ENV=twine_env
# Where to run tar ball tests from
TEST_DIR=~/tmp/deploy_tests

mkdir -p $TEST_DIR

# Get the dir
REPO_DIR=$(pwd)
git checkout $BRANCH

# Fail if untracked files and clean
test -z "$(git status --porcelain)"
git clean -x -ff -d

# Run tests locally and cleanup
./integration_test_with_setup.sh
./test.sh
git reset --hard HEAD
git clean -x -ff -d
test -z "$(git status --porcelain)"

# push to remote and check
git push -u $REMOTE $BRANCH
git diff $BRANCH $REMOTE/$BRANCH --quiet

# See if tests pass remote, TODO use travis CLI
read -t 1 -n 10000 discard || true
read -p "Travis tests pass [y/n]? " -r
if [[ ! $REPLY =~ ^[Yy]$ ]]
then
    exit 1
fi

# test tar ball
source $ENVS/$TWINE_ENV/bin/activate
./build_wheel.sh
twine check dist/*
deactivate
cd $TEST_DIR
UUID=$(uuidgen)
mkdir $UUID
cd $UUID
virtualenv env --python=$PY
source ./env/bin/activate
pip install -r $REPO_DIR/requirements/test.txt
pip install $REPO_DIR/dist/*.tar.gz
cp -r $REPO_DIR/test .
pytest test/ -s -v --hypothesis-seed=0 --disable-pytest-warnings
deactivate
cd $REPO_DIR
# Cleanup since we will build again
git clean -x -ff -d
test -z "$(git status --porcelain)"

# merge master
# Fail if origin and local differ
git checkout $BRANCH
git diff master $REMOTE/master --quiet
git merge master --no-commit
# Fail if not clean
test -z "$(git status --porcelain)"

# merge to master
git checkout master
git merge $BRANCH --squash --no-commit
git status
read -t 1 -n 10000 discard || true
read -p "Commit message (CTRL-C to abort): "
git commit -m "$REPLY"
# Fail if not clean
test -z "$(git status --porcelain)"

# Run tests locally and cleanup
./integration_test_with_setup.sh
./test.sh
git reset --hard HEAD
git clean -x -ff -d
test -z "$(git status --porcelain)"

# test tar ball
source $ENVS/$TWINE_ENV/bin/activate
./build_wheel.sh
twine check dist/*
deactivate
cd $TEST_DIR
UUID=$(uuidgen)
mkdir $UUID
cd $UUID
virtualenv env --python=$PY
source ./env/bin/activate
pip install -r $REPO_DIR/requirements/test.txt
pip install $REPO_DIR/dist/*.tar.gz
cp -r $REPO_DIR/test .
pytest test/ -s -v --hypothesis-seed=0 --disable-pytest-warnings
deactivate
cd $REPO_DIR

# push to test pypi
source $ENVS/$TWINE_ENV/bin/activate
twine upload --repository-url https://test.pypi.org/legacy/ dist/*
deactivate

echo "ready to run?"
echo "pip install $PACKAGE==$VERSION --index-url https://test.pypi.org/simple/"
read -p "Enter when pypi has updated: " -r

# install and test
cd $TEST_DIR
UUID=$(uuidgen)
mkdir $UUID
cd $UUID
virtualenv env --python=$PY
source ./env/bin/activate
pip install -r $REPO_DIR/requirements/test.txt
pip install -r $REPO_DIR/requirements/ipynb.txt
pip install $PACKAGE==$VERSION --index-url https://test.pypi.org/simple/
cp $REPO_DIR/integration_test.sh .
cp -r $REPO_DIR/notebooks .
cp -r $REPO_DIR/example_opt_root .
./integration_test.sh
cp -r $REPO_DIR/test .
pytest test/ -s -v --hypothesis-seed=0 --disable-pytest-warnings
deactivate
cd $REPO_DIR

# push to remote and check
git push $REMOTE master
git diff master $REMOTE/master --quiet

# Show sha256sum in case we want to check against PyPI test, use || for Mac OS version
sha256sum dist/* || shasum -a 256 dist/*

# See if tests pass remote, TODO use travis CLI
read -t 1 -n 10000 discard || true
read -p "Travis tests pass, and push to PyPI? This cannot be undone. [push/no]" -r
if [[ ! $REPLY == push ]]
then
    exit 1
fi

# push to full pypi
source $ENVS/$TWINE_ENV/bin/activate
twine upload dist/*
deactivate

echo "ready to run?"
echo "pip install $PACKAGE==$VERSION"
read -p "Enter when pypi has updated: " -r

# install and test
cd $TEST_DIR
UUID=$(uuidgen)
mkdir $UUID
cd $UUID
virtualenv env --python=$PY
source ./env/bin/activate
pip install -r $REPO_DIR/requirements/test.txt
pip install -r $REPO_DIR/requirements/ipynb.txt
pip install $PACKAGE==$VERSION
cp $REPO_DIR/integration_test.sh .
cp -r $REPO_DIR/notebooks .
cp -r $REPO_DIR/example_opt_root .
./integration_test.sh
cp -r $REPO_DIR/test .
pytest test/ -s -v --hypothesis-seed=0 --disable-pytest-warnings
deactivate
cd $REPO_DIR

# clean and tag
git clean -x -ff -d
test -z "$(git status --porcelain)"
git tag -a v$VERSION -m "$PACKAGE version $VERSION"
git push $REMOTE v$VERSION

# remind user to archive/delete branch
echo "remember to delete branch $BRANCH, and update readthedocs.io"
echo "done"