[
  {
    "path": ".coveragerc",
    "content": "[report]\nexclude_lines =\n    pragma: no cover\n    @abstract\n    ValueError\n    NotImplementedError\n    assert\n    _error\n    def main()\n    pragma: io\n    pragma: main\n    pragma: validator\n"
  },
  {
    "path": ".gitignore",
    "content": ".*\n!.gitignore\n!.gitmodules\n!.flake8\n!.coveragerc\n!.pre-commit-config.yaml\n!.secrets.baseline\n!.travis.yml\n!.readthedocs.yml\n\n# For wheels\nbayesmark/version.py\ndist/\n\n# Java\n*.class\n\n# Intellij\n*.iml\n*.iws\n\n# Gradle\nbuild/\nclasses/\n\nlog/\ntmp/\n/out/\nins.xml\n*.log\n\n# Python\n*.py[co]\n*.egg*\n.cache\n.DS_Store\n\n# env\nenv/\n\n# Emacs\n*~\n.\\#*\n\\#*\\#\n\n# *ipynb\n.ipynb_checkpoints\n*.png\n*.aux\n\n# Hypothesis\ntests/src\nsrc/\n\n# Coverage\nhtmlcov/\n\n# for the test.sh pip compile check\nrequirements/*.chk\nrequirement_chk.in\n"
  },
  {
    "path": ".pre-commit-config.yaml",
    "content": "-   repo: https://github.com/pre-commit/pre-commit-hooks\n    rev: v1.2.3\n    hooks:\n    -   id: flake8\n        exclude: ^(docs/*)\n        args: [--max-line-length=120, --ignore=E203]\n    -   id: check-byte-order-marker\n    -   id: check-case-conflict\n    -   id: check-merge-conflict\n    -   id: end-of-file-fixer\n    -   id: forbid-new-submodules\n    -   id: mixed-line-ending\n        args: [--fix=lf]\n    -   id: trailing-whitespace\n    -   id: debug-statements\n    -   id: check-json\n    -   id: pretty-format-json\n        args: [--autofix, --indent=4]\n    -   id: check-yaml\n    -   id: sort-simple-yaml\n-   repo: https://github.com/ambv/black\n    rev: 19.3b0\n    hooks:\n    -   id: black\n        args: [-l 120, --target-version=py36]\n-   repo: https://github.com/asottile/seed-isort-config\n    rev: v1.2.0\n    hooks:\n    -   id: seed-isort-config\n        args: [--application-directories=test]\n-   repo: https://github.com/pre-commit/mirrors-isort\n    rev: v4.3.4\n    hooks:\n    -   id: isort\n        language_version: python3\n        args: [-w 120, -m 3, -tc, --project=bayesmark]\n- repo: https://github.com/jumanjihouse/pre-commit-hooks\n  rev: 1.11.0\n  hooks:\n    - id: require-ascii\n    - id: script-must-have-extension\n    - id: forbid-binary\n-   repo: https://github.com/Lucas-C/pre-commit-hooks\n    rev: v1.1.6\n    hooks:\n    -   id: forbid-crlf\n    -   id: forbid-tabs\n- repo: https://github.com/kynan/nbstripout\n  rev: fe155a55548c61e4eb53522e57921077acf82c00  # pragma: allowlist secret\n  hooks:\n    - id: nbstripout\n      exclude: ^notebooks/.*\\.out\\.ipynb$\n- repo: https://github.com/Yelp/detect-secrets\n  rev: v0.12.5\n  hooks:\n    - id: detect-secrets\n      args: ['--baseline', '.secrets.baseline']\n- repo: https://github.com/pre-commit/pygrep-hooks\n  rev: v1.4.1  # Use the ref you want to point at\n  hooks:\n    - id: python-no-eval\n    - id: python-check-blanket-noqa\n- repo: https://github.com/asottile/yesqa\n  rev: v0.0.11\n  hooks:\n    - id: yesqa\n- repo: https://github.com/myint/eradicate\n  rev: 522ed7ce2da82d33b3e2331bf50d4671c5a5af9a  # pragma: allowlist secret\n  hooks:\n    - id: eradicate\n      exclude: docs/conf.py\n"
  },
  {
    "path": ".readthedocs.yml",
    "content": "# .readthedocs.yml\n# Read the Docs configuration file\n# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details\n\n# Required\nversion: 2\n\n# Build documentation in the docs/ directory with Sphinx\nsphinx:\n  configuration: docs/conf.py\n\n# Build documentation with MkDocs\n#mkdocs:\n#  configuration: mkdocs.yml\n\n# Optionally build your docs in additional formats such as PDF and ePub\nformats: all\n\n# Optionally set the version of Python and requirements required to build your docs\npython:\n  version: 3.6\n  install:\n    - requirements: requirements/docs.txt\n"
  },
  {
    "path": ".secrets.baseline",
    "content": "{\n  \"exclude\": {\n    \"files\": null,\n    \"lines\": null\n  },\n  \"generated_at\": \"2019-09-18T01:04:54Z\",\n  \"plugins_used\": [\n    {\n      \"name\": \"AWSKeyDetector\"\n    },\n    {\n      \"name\": \"ArtifactoryDetector\"\n    },\n    {\n      \"base64_limit\": 4.5,\n      \"name\": \"Base64HighEntropyString\"\n    },\n    {\n      \"name\": \"BasicAuthDetector\"\n    },\n    {\n      \"hex_limit\": 3,\n      \"name\": \"HexHighEntropyString\"\n    },\n    {\n      \"name\": \"KeywordDetector\"\n    },\n    {\n      \"name\": \"PrivateKeyDetector\"\n    },\n    {\n      \"name\": \"SlackDetector\"\n    },\n    {\n      \"name\": \"StripeDetector\"\n    }\n  ],\n  \"results\": {},\n  \"version\": \"0.12.5\"\n}\n"
  },
  {
    "path": ".travis.yml",
    "content": "language: python\npython:\n  - \"3.6\"\n\nbefore_script:\n    - \"curl -H 'Cache-Control: no-cache' https://raw.githubusercontent.com/fossas/fossa-cli/master/install.sh | sudo bash\"\n\nscript:\n    - ./integration_test_with_setup.sh\n    - ./test.sh\n    - cat requirements/*.txt >requirements.txt\n    - '[ ! -z \"$FOSSA_API_KEY\" ] && (fossa init && fossa analyze) || true'\n"
  },
  {
    "path": "LICENSE",
    "content": "\n                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright [yyyy] [name of copyright owner]\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "MANIFEST.in",
    "content": "include requirements/base.in\ninclude requirements/optimizers.in\ninclude requirements/ipynb.in\ninclude LICENSE\ninclude README.rst\n"
  },
  {
    "path": "README.rst",
    "content": "Installation\n============\n\nThis project provides a benchmark framework to easily compare Bayesian optimization methods on real machine learning tasks.\n\nThis project is experimental and the APIs are not considered stable.\n\nThis Bayesian optimization (BO) benchmark framework requires a few easy steps for setup. It can be run either on a local machine (in serial) or prepare a *commands file* to run on a cluster as parallel experiments (dry run mode).\n\nOnly ``Python>=3.6`` is officially supported, but older versions of Python likely work as well.\n\nThe core package itself can be installed with:\n\n.. code-block:: bash\n\n   pip install bayesmark\n\nHowever, to also require installation of all the \"built in\" optimizers for evaluation, run:\n\n.. code-block:: bash\n\n   pip install bayesmark[optimizers]\n\nIt is also possible to use the same pinned dependencies we used in testing by `installing from the repo <#install-in-editable-mode>`_.\n\nBuilding an environment to run the included notebooks can be done with:\n\n.. code-block:: bash\n\n   pip install bayesmark[notebooks]\n\nOr, ``bayesmark[optimizers,notebooks]`` can be used.\n\nA quick example of running the benchmark is `here <#example>`_. The instructions are used to generate results as below:\n\n.. image:: https://user-images.githubusercontent.com/28273671/66338456-02516b80-e8f6-11e9-8156-2e84e04cf6fe.png\n    :width: 95 %\n\nNon-pip dependencies\n--------------------\n\nTo be able to install ``opentuner`` some system level (non-pip) dependencies must be installed. This can be done with:\n\n.. code-block:: bash\n\n   sudo apt-get install libsqlite3-0\n   sudo apt-get install libsqlite3-dev\n\nOn Ubuntu, this results in:\n\n.. code-block:: console\n\n   > dpkg -l | grep libsqlite\n   ii  libsqlite3-0:amd64    3.11.0-1ubuntu1  amd64  SQLite 3 shared library\n   ii  libsqlite3-dev:amd64  3.11.0-1ubuntu1  amd64  SQLite 3 development files\n\nThe environment should now all be setup to run the BO benchmark.\n\nRunning\n=======\n\nNow we can run each step of the experiments. First, we run all combinations and then run some quick commands to analyze the output.\n\nLaunch the experiments\n----------------------\n\nThe experiments are run using the experiment launcher, which has the following interface:\n\n.. code-block::\n\n   usage: bayesmark-launch [-h] [-dir DB_ROOT] [-odir OPTIMIZER_ROOT] [-v] [-u UUID]\n                     [-dr DATA_ROOT] [-b DB] [-o OPTIMIZER [OPTIMIZER ...]]\n                     [-d DATA [DATA ...]]\n                     [-c [{DT,MLP-adam,MLP-sgd,RF,SVM,ada,kNN,lasso,linear} ...]]\n                     [-m [{acc,mae,mse,nll} ...]] [-n N_CALLS]\n                     [-p N_SUGGEST] [-r N_REPEAT] [-nj N_JOBS] [-ofile JOBS_FILE]\n\nThe arguments are:\n\n.. code-block::\n\n     -h, --help            show this help message and exit\n     -dir DB_ROOT, -db-root DB_ROOT\n                           root directory for all benchmark experiments output\n     -odir OPTIMIZER_ROOT, --opt-root OPTIMIZER_ROOT\n                           Directory with optimization wrappers\n     -v, --verbose         print the study logs to console\n     -u UUID, --uuid UUID  length 32 hex UUID for this experiment\n     -dr DATA_ROOT, --data-root DATA_ROOT\n                           root directory for all custom csv files\n     -b DB, --db DB        database ID of this benchmark experiment\n     -o OPTIMIZER [OPTIMIZER ...], --opt OPTIMIZER [OPTIMIZER ...]\n                           optimizers to use\n     -d DATA [DATA ...], --data DATA [DATA ...]\n                           data sets to use\n     -c, --classifier [{DT,MLP-adam,MLP-sgd,RF,SVM,ada,kNN,lasso,linear} ...]\n                           classifiers to use\n     -m, --metric [{acc,mae,mse,nll} ...]\n                           scoring metric to use\n     -n N_CALLS, --calls N_CALLS\n                           number of function evaluations\n     -p N_SUGGEST, --suggestions N_SUGGEST\n                           number of suggestions to provide in parallel\n     -r N_REPEAT, --repeat N_REPEAT\n                           number of repetitions of each study\n     -nj N_JOBS, --num-jobs N_JOBS\n                           number of jobs to put in the dry run file, the default\n                           0 value disables dry run (real run)\n     -ofile JOBS_FILE, --jobs-file JOBS_FILE\n                           a jobs file with all commands to be run\n\nThe output files will be placed in ``[DB_ROOT]/[DBID]``. If ``DBID`` is not specified, it will be a randomly created subdirectory with a new name to avoid overwriting previous experiments. The path to ``DBID`` is shown at the beginning of ``stdout`` when running ``bayesmark-launch``. In general, let the launcher create and setup ``DBID`` unless you are appending to a previous experiment, in which case, specify the existing ``DBID``.\n\nThe launcher's sequence of commands can be accessed programmatically via :func:`.experiment_launcher.gen_commands`. The individual experiments can be launched programmatically via :func:`.experiment.run_sklearn_study`.\n\nSelecting the experiments\n^^^^^^^^^^^^^^^^^^^^^^^^^\n\nA list of optimizers, classifiers, data sets, and metrics can be listed using the ``-o``/``-c``/``-d``/``-m`` commands, respectively. If not specified, the program launches all possible options.\n\nSelecting the optimizer\n^^^^^^^^^^^^^^^^^^^^^^^\n\nA few different open source optimizers have been included as an example and are considered the \"built-in\" optimizers. The original repos are shown in the `Links <#links>`_.\n\nThe data argument ``-o`` allows a list containing the \"built-in\" optimizers:\n\n.. code-block::\n\n   \"HyperOpt\", \"Nevergrad-OnePlusOne\", \"OpenTuner-BanditA\", \"OpenTuner-GA\", \"OpenTuner-GA-DE\", \"PySOT\", \"RandomSearch\", \"Scikit-GBRT-Hedge\", \"Scikit-GP-Hedge\", \"Scikit-GP-LCB\"\n\nor, one can specify a user-defined optimizer. The class containing an optimizer conforming to the API must be found in in the folder specified by ``--opt-root``. Additionally, a configuration defining each optimizer must be defined in ``[OPT_ROOT]/config.json``. The ``--opt-root`` and ``config.json`` may be omitted if only built-in optimizers are used.\n\nAdditional details for providing a new optimizer are found in `adding a new optimizer <#adding-a-new-optimizer>`_.\n\nSelecting the data set\n^^^^^^^^^^^^^^^^^^^^^^\n\nBy default, this benchmark uses the `sklearn example data sets <https://scikit-learn.org/stable/datasets/index.html#toy-datasets>`_ as the \"built-in\" data sets for use in ML model tuning problems.\n\nThe data argument ``-d`` allows a list containing the \"built-in\" data sets:\n\n.. code-block::\n\n   \"breast\", \"digits\", \"iris\", \"wine\", \"boston\", \"diabetes\"\n\nor, it can refer to a custom ``csv`` file, which is the name of file in the folder specified by ``--data-root``. It also follows the convention that regression data sets start with ``reg-`` and classification data sets start with ``clf-``. For example, the classification data set in ``[DATA_ROOT]/clf-foo.csv`` is specified with ``-d clf-foo``.\n\nThe ``csv`` file can be anything readable by pandas, but we assume the final column is the target and all other columns are features. The target column should be integer for classification data and float for regression. The features should float (or ``str`` for categorical variable columns). See ``bayesmark.data.load_data`` for more information.\n\nDry run for cluster jobs\n^^^^^^^^^^^^^^^^^^^^^^^^\n\nIt is also possible to do a \"dry run\" of the launcher by specifying a value for ``--num-jobs`` greater than zero. For example, if ``--num-jobs 50`` is provided, a text file listing 50 commands to run is produced, with one command (job) per line. This is useful when preparing a list of commands to run later on a cluster.\n\nA dry run will generate a command file (e.g., ``jobs.txt``) like the following (with a meta-data header). Each line corresponds to a command that can be used as a job on a different worker:\n\n.. code-block::\n\n   # running: {'--uuid': None, '-db-root': '/foo', '--opt-root': '/example_opt_root', '--data-root': None, '--db': 'bo_example_folder', '--opt': ['RandomSearch', 'PySOT'], '--data': None, '--classifier': ['SVM', 'DT'], '--metric': None, '--calls': 15, '--suggestions': 1, '--repeat': 3, '--num-jobs': 50, '--jobs-file': '/jobs.txt', '--verbose': False, 'dry_run': True, 'rev': '9a14ef2', 'opt_rev': None}\n   # cmd: python bayesmark-launch -n 15 -r 3 -dir foo -o RandomSearch PySOT -c SVM DT -nj 50 -b bo_example_folder\n   job_e2b63a9_00 bayesmark-exp -c SVM -d diabetes -o PySOT -u 079a155f03095d2ba414a5d2cedde08c -m mse -n 15 -p 1 -dir foo -b bo_example_folder && bayesmark-exp -c SVM -d boston -o RandomSearch -u 400e4c0be8295ad59db22d9b5f31d153 -m mse -n 15 -p 1 -dir foo -b bo_example_folder && bayesmark-exp -c SVM -d digits -o RandomSearch -u fe73a2aa960a5e3f8d78bfc4bcf51428 -m acc -n 15 -p 1 -dir foo -b bo_example_folder\n   job_e2b63a9_01 bayesmark-exp -c DT -d diabetes -o PySOT -u db1d9297948554e096006c172a0486fb -m mse -n 15 -p 1 -dir foo -b bo_example_folder && bayesmark-exp -c SVM -d boston -o RandomSearch -u 7148f690ed6a543890639cc59db8320b -m mse -n 15 -p 1 -dir foo -b bo_example_folder && bayesmark-exp -c SVM -d breast -o PySOT -u 72c104ba1b6d5bb8a546b0064a7c52b1 -m nll -n 15 -p 1 -dir foo -b bo_example_folder\n   job_e2b63a9_02 bayesmark-exp -c SVM -d iris -o PySOT -u cc63b2c1e4315a9aac0f5f7b496bfb0f -m nll -n 15 -p 1 -dir foo -b bo_example_folder && bayesmark-exp -c DT -d breast -o RandomSearch -u aec62e1c8b5552e6b12836f0c59c1681 -m nll -n 15 -p 1 -dir foo -b bo_example_folder && bayesmark-exp -c DT -d digits -o RandomSearch -u 4d0a175d56105b6bb3055c3b62937b2d -m acc -n 15 -p 1 -dir foo -b bo_example_folder\n   ...\n\nThis package does not have built in support for deploying these jobs on a cluster or cloud environment (.e.g., AWS).\n\nThe UUID argument\n^^^^^^^^^^^^^^^^^\n\nThe ``UUID`` is a 32-char hex string used as a master random seed which we use to draw random seeds for the experiments. If ``UUID`` is not specified a version 4 UUID is generated. The used UUID is displayed at the beginning of ``stdout``. In general, the ``UUID`` should not specified/re-used except for debugging because it violates the assumption that the experiment UUIDs are unique.\n\nAggregate results\n-----------------\n\nNext to aggregate all the experiment files into combined (json) files we need to run the aggregation command:\n\n.. code-block::\n\n   usage: bayesmark-agg [-h] [-dir DB_ROOT] [-odir OPTIMIZER_ROOT] [-v] -b DB [-rv]\n\nThe arguments are:\n\n.. code-block::\n\n     -h, --help            show this help message and exit\n     -dir DB_ROOT, -db-root DB_ROOT\n                           root directory for all benchmark experiments output\n     -odir OPTIMIZER_ROOT, --opt-root OPTIMIZER_ROOT\n                           Directory with optimization wrappers\n     -v, --verbose         print the study logs to console\n     -b DB, --db DB        database ID of this benchmark experiment\n     -rv, --ravel          ravel all studies to store batch suggestions as if\n                           they were serial\n\nThe ``DB_ROOT`` must match the folder from the launcher ``bayesmark-launch``, and ``DBID`` must match that displayed from the launcher as well. The aggregate files are found in ``[DB_ROOT]/[DBID]/derived``.\n\nThe result aggregation can be done programmatically via :func:`.experiment_aggregate.concat_experiments`.\n\nAnalyze and summarize results\n-----------------------------\n\nFinally, to run a statistical analysis presenting a summary of the experiments we run\n\n.. code-block::\n\n   usage: bayesmark-anal [-h] [-dir DB_ROOT] [-odir OPTIMIZER_ROOT] [-v] -b DB\n\nThe arguments are:\n\n.. code-block::\n\n     -h, --help            show this help message and exit\n     -dir DB_ROOT, -db-root DB_ROOT\n                           root directory for all benchmark experiments output\n     -odir OPTIMIZER_ROOT, --opt-root OPTIMIZER_ROOT\n                           Directory with optimization wrappers\n     -v, --verbose         print the study logs to console\n     -b DB, --db DB        database ID of this benchmark experiment\n\nThe ``DB_ROOT`` must match the folder from the launcher ``bayesmark-launch``, and ``DBID`` must match that displayed from the launcher as well. The aggregate files are found in ``[DB_ROOT]/[DBID]/derived``.\n\nThe ``bayesmark-anal`` command looks for a ``baseline.json`` file in ``[DB_ROOT]/[DBID]/derived``, which states the best possible and random search performance. If no such file is present, ``bayesmark-anal`` automatically calls ``bayesmark-baseline`` to build it. The baselines are inferred from the random search performance in the logs. The baseline values are considered fixed (not random) quantities when ``bayesmark-anal`` builds confidence intervals. Therefore, we allow the user to leave them fixed and do not rebuild them when ``bayesmark-anal`` is called if a baselines file is already present.\n\nThe result analysis can be done programmatically via :func:`.experiment_analysis.compute_aggregates`, and the baseline computation via :func:`.experiment_baseline.compute_baseline`.\n\nSee :ref:`how-scoring-works` for more information on how the scores are computed and aggregated.\n\nExample\n-------\n\nAfter finishing the setup (environment) a small-scale serial can be run as follows:\n\n.. code-block:: console\n\n   > # setup\n   > DB_ROOT=./notebooks  # path/to/where/you/put/results\n   > DBID=bo_example_folder\n   > mkdir $DB_ROOT\n   > # experiments\n   > bayesmark-launch -n 15 -r 3 -dir $DB_ROOT -b $DBID -o RandomSearch PySOT -c SVM DT -v\n   Supply --uuid 3adc3182635e44ea96969d267591f034 to reproduce this run.\n   Supply --dbid bo_example_folder to append to this experiment or reproduce jobs file.\n   User must ensure equal reps of each optimizer for unbiased results\n   -c DT -d boston -o PySOT -u a1b287b450385ad09b2abd7582f404a2 -m mae -n 15 -p 1 -dir /notebooks -b bo_example_folder\n   -c DT -d boston -o PySOT -u 63746599ae3f5111a96942d930ba1898 -m mse -n 15 -p 1 -dir /notebooks -b bo_example_folder\n   -c DT -d boston -o RandomSearch -u 8ba16c880ef45b27ba0909199ab7aa8a -m mae -n 15 -p 1 -dir /notebooks -b bo_example_folder\n   ...\n   0 failures of benchmark script after 144 studies.\n   done\n   > # aggregate\n   > bayesmark-agg -dir $DB_ROOT -b $DBID\n   > # analyze\n   > bayesmark-anal -dir $DB_ROOT -b $DBID -v\n   ...\n   median score @ 15:\n   optimizer\n   PySOT_0.2.3_9b766b6           0.330404\n   RandomSearch_0.0.1_9b766b6    0.961829\n   mean score @ 15:\n   optimizer\n   PySOT_0.2.3_9b766b6           0.124262\n   RandomSearch_0.0.1_9b766b6    0.256422\n   normed mean score @ 15:\n   optimizer\n   PySOT_0.2.3_9b766b6           0.475775\n   RandomSearch_0.0.1_9b766b6    0.981787\n   done\n\nThe aggregate result files (i.e., ``summary.json``) will now be available in ``$DB_ROOT/$DBID/derived``. However, this will be high variance since it was from only 3 trials and only to 15 function evaluations.\n\nPlotting and notebooks\n----------------------\n\nPlotting the quantitative results found in ``$DB_ROOT/$DBID/derived`` can be done using the notebooks found in the ``notebooks/`` folder of the git repository. The notebook ``plot_mean_score.ipynb`` generates plots for aggregate scores averaging over all problems. The notebook ``plot_test_case.ipynb`` generates plots for each test problem.\n\nTo use the notebooks, first copy over the ``notebooks/`` folder from git repository.\n\nTo setup the kernel for running the notebooks use:\n\n.. code-block:: bash\n\n   virtualenv bobm_ipynb --python=python3.6\n   source ./bobm_ipynb/bin/activate\n   pip install bayesmark[notebooks]\n   python -m ipykernel install --name=bobm_ipynb --user\n\nNow, the notebooks for plotting can be run with the command ``jupyter notebook`` and selecting the kernel ``bobm_ipynb``.\n\nIt is also possible to convert the notebooks to an HTML report at the command line using ``nbconvert``. For example, use the command:\n\n.. code-block:: bash\n\n   jupyter nbconvert --to html --execute notebooks/plot_mean_score.ipynb\n\nThe output file will be in ``./notebooks/plot_mean_score.html``. Here is an example `export <https://github.com/uber/bayesmark/files/3699241/plot_mean_score.pdf>`_. See the ``nbconvert`` `documentation page <https://nbconvert.readthedocs.io/en/latest/usage.html#supported-output-formats>`_ for more output formats. By default, the notebooks look in ``./notebooks/bo_example_folder/`` for the ``summary.json`` from ``bayesmark-anal``.\n\nTo run ``plot_test_case.ipynb`` use the command:\n\n.. code-block:: bash\n\n   jupyter nbconvert --to html --execute notebooks/plot_test_case.ipynb --ExecutePreprocessor.timeout=600\n\nThe ``--ExecutePreprocessor.timeout=600`` timeout increase is needed due to the large number of plots being generated. The output will be in ``./notebooks/plot_test_case.html``.\n\nAdding a new optimizer\n======================\n\nAll optimizers in this benchmark are required to follow the interface specified of the ``AbstractOptimizer`` class in ``bayesmark.abstract_optimizer``. In general, this requires creating a wrapper class around the new optimizer. The wrapper classes must all be placed in a folder referred to by the ``--opt-root`` argument. This folder must also contain the ``config.json`` folder.\n\nThe interface is simple, one must merely implement the ``suggest`` and ``observe`` functions. The ``suggest`` function generates new guesses for evaluating the function. Once evaluated, the function evaluations are passed to the ``observe`` function. The objective function is *not* evaluated by the optimizer class. The objective function is evaluated on outside and results are passed to ``observe``. This is the correct setup for Bayesian optimization because:\n\n* We can observe/try inputs that were never suggested\n* We can ignore suggestions\n* The objective function may not be something as simple as a Python function\n\nSo passing the function as an argument as is done in ``scipy.optimization`` is artificially restrictive.\n\nThe implementation of the wrapper will look like the following:\n\n.. code-block:: python\n\n   from bayesmark.abstract_optimizer import AbstractOptimizer\n   from bayesmark.experiment import experiment_main\n\n\n   class NewOptimizerName(AbstractOptimizer):\n       # Used for determining the version number of package used\n       primary_import = \"name of import used e.g, opentuner\"\n\n       def __init__(self, api_config, optional_arg_foo=None, optional_arg_bar=None):\n           \"\"\"Build wrapper class to use optimizer in benchmark.\n\n           Parameters\n           ----------\n           api_config : dict-like of dict-like\n               Configuration of the optimization variables. See API description.\n           \"\"\"\n           AbstractOptimizer.__init__(self, api_config)\n           # Do whatever other setup is needed\n           # ...\n\n       def suggest(self, n_suggestions=1):\n           \"\"\"Get suggestion from the optimizer.\n\n           Parameters\n           ----------\n           n_suggestions : int\n               Desired number of parallel suggestions in the output\n\n           Returns\n           -------\n           next_guess : list of dict\n               List of `n_suggestions` suggestions to evaluate the objective\n               function. Each suggestion is a dictionary where each key\n               corresponds to a parameter being optimized.\n           \"\"\"\n           # Do whatever is needed to get the parallel guesses\n           # ...\n           return x_guess\n\n       def observe(self, X, y):\n           \"\"\"Feed an observation back.\n\n           Parameters\n           ----------\n           X : list of dict-like\n               Places where the objective function has already been evaluated.\n               Each suggestion is a dictionary where each key corresponds to a\n               parameter being optimized.\n           y : array-like, shape (n,)\n               Corresponding values where objective has been evaluated\n           \"\"\"\n           # Update the model with new objective function observations\n           # ...\n           # No return statement needed\n\n\n   if __name__ == \"__main__\":\n       # This is the entry point for experiments, so pass the class to experiment_main to use this optimizer.\n       # This statement must be included in the wrapper class file:\n       experiment_main(NewOptimizerName)\n\nDepending on the API of the optimizer being wrapped, building this wrapper class may only or require a few lines of code, or be a total pain.\n\nThe config file\n---------------\n\nNote: A config file is now optional. If no ``config.json`` is provided, the experiment launcher will look for all folders with an `optimizer.py` in the ``--opt-root`` directory.\n\nEach optimizer wrapper can have multiple configurations, which is each referred to as a different optimizer in the benchmark. For example, the JSON config file will have entries as follows:\n\n.. code-block:: json\n\n   {\n       \"OpenTuner-BanditA-New\": [\n           \"opentuner_optimizer.py\",\n           {\"techniques\": [\"AUCBanditMetaTechniqueA\"]}\n       ],\n       \"OpenTuner-GA-DE-New\": [\n           \"opentuner_optimizer.py\",\n           {\"techniques\": [\"PSO_GA_DE\"]}\n       ],\n       \"OpenTuner-GA-New\": [\n           \"opentuner_optimizer.py\",\n           {\"techniques\": [\"PSO_GA_Bandit\"]}\n       ]\n   }\n\nBasically, the entries are ``\"name_of_strategy\": [\"file_with_class\", {kwargs_for_the_constructor}]``. Here, ``OpenTuner-BanditA``, ``OpenTuner-GA-DE``, and ``OpenTuner-GA`` are all treated as different optimizers by the benchmark even though the all use the same class from ``opentuner_optimizer.py``.\n\nThis ``config.json`` must be in the same folder as the optimizer classes (e.g., ``opentuner_optimizer.py``).\n\nRunning with a new optimizer\n----------------------------\n\nTo run the benchmarks using a new optimizer, simply provide its name (from ``config.json``) in the ``-o`` list. The ``--opt-root`` argument must be specified in this case. For example, the launch command from the `example <#example>`_ becomes:\n\n.. code-block:: bash\n\n   bayesmark-launch -n 15 -r 3 -dir $DB_ROOT -b $DBID -o RandomSearch PySOT-New -c SVM DT --opt-root ./example_opt_root -v\n\nHere, we are using the example ``PySOT-New`` wrapper from the ``example_opt_root`` folder in the git repo. It is equivalent to the builtin ``PySOT``, but gives an example of how to provide a new custom optimizer.\n\nContributing\n============\n\nThe following instructions have been tested with Python 3.6.8 on Ubuntu (16.04.5 LTS).\n\nInstall in editable mode\n------------------------\n\nFirst, define the variables for the paths we will use:\n\n.. code-block:: bash\n\n   GIT=/path/to/where/you/put/repos\n   ENVS=/path/to/where/you/put/virtualenvs\n\nThen clone the repo in your git directory ``$GIT``:\n\n.. code-block:: bash\n\n   cd $GIT\n   git clone https://github.com/uber/bayesmark.git\n\nInside your virtual environments folder ``$ENVS``, make the environment:\n\n.. code-block:: bash\n\n   cd $ENVS\n   virtualenv bayesmark --python=python3.6\n   source $ENVS/bayesmark/bin/activate\n\nNow we can install the pip dependencies. Move back into your git directory and run\n\n.. code-block:: bash\n\n   cd $GIT/bayesmark\n   pip install -r requirements/base.txt\n   pip install -r requirements/optimizers.txt\n   pip install -e .  # Install the benchmark itself\n\nYou may want to run ``pip install -U pip`` first if you have an old version of ``pip``. The file ``optimizers.txt`` contains the dependencies for all the optimizers used in the benchmark. The analysis and aggregation programs can be run using only the requirements in ``base.txt``.\n\nContributor tools\n-----------------\n\nFirst, we need to setup some needed tools:\n\n.. code-block:: bash\n\n   cd $ENVS\n   virtualenv bayesmark_tools --python=python3.6\n   source $ENVS/bayesmark_tools/bin/activate\n   pip install -r $GIT/bayesmark/requirements/tools.txt\n\nTo install the pre-commit hooks for contributing run (in the ``bayesmark_tools`` environment):\n\n.. code-block:: bash\n\n   cd $GIT/bayesmark\n   pre-commit install\n\nTo rebuild the requirements, we can run:\n\n.. code-block:: bash\n\n   cd $GIT/bayesmark\n   # Get py files from notebooks to analyze\n   jupyter nbconvert --to script notebooks/*.ipynb\n   # Generate the .in files (but pins to latest, which we might not want)\n   pipreqs bayesmark/ --ignore bayesmark/builtin_opt/ --savepath requirements/base.in\n   pipreqs test/ --savepath requirements/test.in\n   pipreqs bayesmark/builtin_opt/ --savepath requirements/optimizers.in\n   pipreqs notebooks/ --savepath requirements/ipynb.in\n   pipreqs docs/ --savepath requirements/docs.in\n   # Regenerate the .txt files from .in files\n   pip-compile-multi --no-upgrade\n\nGenerating the documentation\n----------------------------\n\nFirst setup the environment for building with ``Sphinx``:\n\n.. code-block:: bash\n\n   cd $ENVS\n   virtualenv bayesmark_docs --python=python3.6\n   source $ENVS/bayesmark_docs/bin/activate\n   pip install -r $GIT/bayesmark/requirements/docs.txt\n\nThen we can do the build:\n\n.. code-block:: bash\n\n   cd $GIT/bayesmark/docs\n   make all\n   open _build/html/index.html\n\nDocumentation will be available in all formats in ``Makefile``. Use ``make html`` to only generate the HTML documentation.\n\nRunning the tests\n-----------------\n\nThe tests for this package can be run with:\n\n.. code-block:: bash\n\n   cd $GIT/bayesmark\n   ./test.sh\n\nThe script creates a conda environment using the requirements found in ``requirements/test.txt``.\n\nThe ``test.sh`` script *must* be run from a *clean* git repo.\n\nOr if we only want to run the unit tests and not check the adequacy of the requirements files, one can use\n\n.. code-block:: bash\n\n   # Setup environment\n   cd $ENVS\n   virtualenv bayesmark_test --python=python3.6\n   source $ENVS/bayesmark_test/bin/activate\n   pip install -r $GIT/bayesmark/requirements/test.txt\n   pip install -e $GIT/bayesmark\n   # Now run tests\n   cd $GIT/bayesmark/\n   pytest test/ -s -v --hypothesis-seed=0 --disable-pytest-warnings --cov=bayesmark --cov-report html\n\nA code coverage report will also be produced in ``$GIT/bayesmark/htmlcov/index.html``.\n\nDeployment\n----------\n\nThe wheel (tar ball) for deployment as a pip installable package can be built using the script:\n\n.. code-block:: bash\n\n   cd $GIT/bayesmark/\n   ./build_wheel.sh\n\nLinks\n=====\n\nThe `source <https://github.com/uber/bayesmark>`_ is hosted on GitHub.\n\nThe `documentation <https://bayesmark.readthedocs.io/en/latest/>`_ is hosted at Read the Docs.\n\nInstallable from `PyPI <https://pypi.org/project/bayesmark/>`_.\n\nThe builtin optimizers are wrappers on the following projects:\n\n* `HyperOpt <https://github.com/hyperopt/hyperopt>`_\n* `Nevergrad <https://github.com/facebookresearch/nevergrad>`_\n* `OpenTuner <https://github.com/jansel/opentuner>`_\n* `PySOT <https://github.com/dme65/pySOT>`_\n* `Scikit-optimize <https://github.com/scikit-optimize/scikit-optimize>`_\n\nLicense\n=======\n\nThis project is licensed under the Apache 2 License - see the LICENSE file for details.\n"
  },
  {
    "path": "bayesmark/__init__.py",
    "content": "__version__ = \"0.0.8\"\n__author__ = \"Ryan Turner\"\n__license__ = \"Apache v2\"\n"
  },
  {
    "path": "bayesmark/abstract_optimizer.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Abstract base class for the optimizers in the benchmark. This creates a common API across all packages.\n\"\"\"\nfrom abc import ABC, abstractmethod\n\nfrom importlib_metadata import version\n\n\nclass AbstractOptimizer(ABC):\n    \"\"\"Abstract base class for the optimizers in the benchmark. This creates a common API across all packages.\n    \"\"\"\n\n    # Every implementation package needs to specify this static variable, e.g., \"primary_import=opentuner\"\n    primary_import = None\n\n    def __init__(self, api_config, **kwargs):\n        \"\"\"Build wrapper class to use an optimizer in benchmark.\n\n        Parameters\n        ----------\n        api_config : dict-like of dict-like\n            Configuration of the optimization variables. See API description.\n        \"\"\"\n        self.api_config = api_config\n\n    @classmethod\n    def get_version(cls):\n        \"\"\"Get the version for this optimizer.\n\n        Returns\n        -------\n        version_str : str\n            Version number of the optimizer. Usually, this is equivalent to ``package.__version__``.\n        \"\"\"\n        assert (cls.primary_import is None) or isinstance(cls.primary_import, str)\n        # Should use x.x.x as version if sub-class did not specify its primary import\n        version_str = \"x.x.x\" if cls.primary_import is None else version(cls.primary_import)\n        return version_str\n\n    @abstractmethod\n    def suggest(self, n_suggestions):\n        \"\"\"Get a suggestion from the optimizer.\n\n        Parameters\n        ----------\n        n_suggestions : int\n            Desired number of parallel suggestions in the output\n\n        Returns\n        -------\n        next_guess : list of dict\n            List of `n_suggestions` suggestions to evaluate the objective\n            function. Each suggestion is a dictionary where each key\n            corresponds to a parameter being optimized.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def observe(self, X, y):\n        \"\"\"Send an observation of a suggestion back to the optimizer.\n\n        Parameters\n        ----------\n        X : list of dict-like\n            Places where the objective function has already been evaluated.\n            Each suggestion is a dictionary where each key corresponds to a\n            parameter being optimized.\n        y : array-like, shape (n,)\n            Corresponding values where objective has been evaluated\n        \"\"\"\n        pass\n"
  },
  {
    "path": "bayesmark/builtin_opt/__init__.py",
    "content": ""
  },
  {
    "path": "bayesmark/builtin_opt/config.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom bayesmark.constants import RANDOM_SEARCH\n\nCONFIG = {\n    \"HyperOpt\": [\"hyperopt_optimizer.py\", {}],\n    \"Nevergrad-OnePlusOne\": [\"nevergrad_optimizer.py\", {\"budget\": 300, \"tool\": \"OnePlusOne\"}],\n    \"OpenTuner-BanditA\": [\"opentuner_optimizer.py\", {\"techniques\": [\"AUCBanditMetaTechniqueA\"]}],\n    \"OpenTuner-GA\": [\"opentuner_optimizer.py\", {\"techniques\": [\"PSO_GA_Bandit\"]}],\n    \"OpenTuner-GA-DE\": [\"opentuner_optimizer.py\", {\"techniques\": [\"PSO_GA_DE\"]}],\n    \"PySOT\": [\"pysot_optimizer.py\", {}],\n    \"RandomSearch\": [\"random_optimizer.py\", {}],\n    \"Scikit-GBRT-Hedge\": [\n        \"scikit_optimizer.py\",\n        {\"acq_func\": \"gp_hedge\", \"base_estimator\": \"GBRT\", \"n_initial_points\": 5},\n    ],\n    \"Scikit-GP-Hedge\": [\"scikit_optimizer.py\", {\"acq_func\": \"gp_hedge\", \"base_estimator\": \"GP\", \"n_initial_points\": 5}],\n    \"Scikit-GP-LCB\": [\"scikit_optimizer.py\", {\"acq_func\": \"LCB\", \"base_estimator\": \"GP\", \"n_initial_points\": 5}],\n}\n\nassert RANDOM_SEARCH in CONFIG, \"%s required in settings file.\" % RANDOM_SEARCH\n"
  },
  {
    "path": "bayesmark/builtin_opt/hyperopt_optimizer.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nimport numpy as np\nfrom hyperopt import hp, tpe\nfrom hyperopt.base import JOB_STATE_DONE, JOB_STATE_NEW, STATUS_OK, Domain, Trials\nfrom scipy.interpolate import interp1d\n\nfrom bayesmark.abstract_optimizer import AbstractOptimizer\nfrom bayesmark.np_util import random as np_random\nfrom bayesmark.np_util import random_seed\n\n# Sklearn prefers str to unicode:\nDTYPE_MAP = {\"real\": float, \"int\": int, \"bool\": bool, \"cat\": str, \"ordinal\": str}\n\n\ndef dummy_f(x):\n    assert False, \"This is a placeholder, it should never be called.\"\n\n\ndef only(x):\n    y, = x\n    return y\n\n\nclass HyperoptOptimizer(AbstractOptimizer):\n    primary_import = \"hyperopt\"\n\n    def __init__(self, api_config, random=np_random):\n        \"\"\"Build wrapper class to use hyperopt optimizer in benchmark.\n\n        Parameters\n        ----------\n        api_config : dict-like of dict-like\n            Configuration of the optimization variables. See API description.\n        \"\"\"\n        AbstractOptimizer.__init__(self, api_config)\n        self.random = random\n\n        space, self.round_to_values = HyperoptOptimizer.get_hyperopt_dimensions(api_config)\n        self.domain = Domain(dummy_f, space, pass_expr_memo_ctrl=None)\n        self.trials = Trials()\n\n        # Some book keeping like opentuner wrapper\n        self.trial_id_lookup = {}\n\n        # Store just for data validation\n        self.param_set_chk = frozenset(api_config.keys())\n\n    @staticmethod\n    def hashable_dict(d):\n        \"\"\"A custom function for hashing dictionaries.\n\n        Parameters\n        ----------\n        d : dict or dict-like\n            The dictionary to be converted to immutable/hashable type.\n\n        Returns\n        -------\n        hashable_object : frozenset of tuple pairs\n            Bijective equivalent to dict that can be hashed.\n        \"\"\"\n        hashable_object = frozenset(d.items())\n        return hashable_object\n\n    @staticmethod\n    def get_hyperopt_dimensions(api_config):\n        \"\"\"Help routine to setup hyperopt search space in constructor.\n\n        Take api_config as argument so this can be static.\n        \"\"\"\n        # The ordering of iteration prob makes no difference, but just to be\n        # safe and consistnent with space.py, I will make sorted.\n        param_list = sorted(api_config.keys())\n\n        space = {}\n        round_to_values = {}\n        for param_name in param_list:\n            param_config = api_config[param_name]\n\n            param_type = param_config[\"type\"]\n\n            param_space = param_config.get(\"space\", None)\n            param_range = param_config.get(\"range\", None)\n            param_values = param_config.get(\"values\", None)\n\n            # Some setup for case that whitelist of values is provided:\n            values_only_type = param_type in (\"cat\", \"ordinal\")\n            if (param_values is not None) and (not values_only_type):\n                assert param_range is None\n                param_values = np.unique(param_values)\n                param_range = (param_values[0], param_values[-1])\n                round_to_values[param_name] = interp1d(\n                    param_values, param_values, kind=\"nearest\", fill_value=\"extrapolate\"\n                )\n\n            if param_type == \"int\":\n                low, high = param_range\n                if param_space in (\"log\", \"logit\"):\n                    space[param_name] = hp.qloguniform(param_name, np.log(low), np.log(high), 1)\n                else:\n                    space[param_name] = hp.quniform(param_name, low, high, 1)\n            elif param_type == \"bool\":\n                assert param_range is None\n                assert param_values is None\n                space[param_name] = hp.choice(param_name, (False, True))\n            elif param_type in (\"cat\", \"ordinal\"):\n                assert param_range is None\n                space[param_name] = hp.choice(param_name, param_values)\n            elif param_type == \"real\":\n                low, high = param_range\n                if param_space in (\"log\", \"logit\"):\n                    space[param_name] = hp.loguniform(param_name, np.log(low), np.log(high))\n                else:\n                    space[param_name] = hp.uniform(param_name, low, high)\n            else:\n                assert False, \"type %s not handled in API\" % param_type\n\n        return space, round_to_values\n\n    def get_trial(self, trial_id):\n        for trial in self.trials._dynamic_trials:\n            if trial[\"tid\"] == trial_id:\n                assert isinstance(trial, dict)\n                # Make sure right kind of dict\n                assert \"state\" in trial and \"result\" in trial\n                assert trial[\"state\"] == JOB_STATE_NEW\n                return trial\n        assert False, \"No matching trial ID\"\n\n    def cleanup_guess(self, x_guess):\n        assert isinstance(x_guess, dict)\n        # Also, check the keys are only the vars we are searching over:\n        assert frozenset(x_guess.keys()) == self.param_set_chk\n\n        # Do the rounding\n        # Make a copy to be safe, and also unpack singletons\n        # We may also need to consider clip_chk at some point like opentuner\n        x_guess = {k: only(x_guess[k]) for k in x_guess}\n        for param_name, round_f in self.round_to_values.items():\n            x_guess[param_name] = round_f(x_guess[param_name])\n        # Also ensure this is correct dtype so sklearn is happy\n        x_guess = {k: DTYPE_MAP[self.api_config[k][\"type\"]](x_guess[k]) for k in x_guess}\n        return x_guess\n\n    def _suggest(self):\n        \"\"\"Helper function to `suggest` that does the work of calling\n        `hyperopt` via its dumb API.\n        \"\"\"\n        new_ids = self.trials.new_trial_ids(1)\n        assert len(new_ids) == 1\n        self.trials.refresh()\n\n        seed = random_seed(self.random)\n        new_trials = tpe.suggest(new_ids, self.domain, self.trials, seed)\n        assert len(new_trials) == 1\n\n        self.trials.insert_trial_docs(new_trials)\n        self.trials.refresh()\n\n        new_trial, = new_trials  # extract singleton\n        return new_trial\n\n    def suggest(self, n_suggestions=1):\n        \"\"\"Make `n_suggestions` suggestions for what to evaluate next.\n\n        This requires the user observe all previous suggestions before calling\n        again.\n\n        Parameters\n        ----------\n        n_suggestions : int\n            The number of suggestions to return.\n\n        Returns\n        -------\n        next_guess : list of dict\n            List of `n_suggestions` suggestions to evaluate the objective\n            function. Each suggestion is a dictionary where each key\n            corresponds to a parameter being optimized.\n        \"\"\"\n        assert n_suggestions >= 1, \"invalid value for n_suggestions\"\n\n        # Get the new trials, it seems hyperopt either uses random search or\n        # guesses one at a time anyway, so we might as welll call serially.\n        new_trials = [self._suggest() for _ in range(n_suggestions)]\n\n        X = []\n        for trial in new_trials:\n            x_guess = self.cleanup_guess(trial[\"misc\"][\"vals\"])\n            X.append(x_guess)\n\n            # Build lookup to get original trial object\n            x_guess_ = HyperoptOptimizer.hashable_dict(x_guess)\n            assert x_guess_ not in self.trial_id_lookup, \"the suggestions should not already be in the trial dict\"\n            self.trial_id_lookup[x_guess_] = trial[\"tid\"]\n\n        assert len(X) == n_suggestions\n        return X\n\n    def observe(self, X, y):\n        \"\"\"Feed the observations back to hyperopt.\n\n        Parameters\n        ----------\n        X : list of dict-like\n            Places where the objective function has already been evaluated.\n            Each suggestion is a dictionary where each key corresponds to a\n            parameter being optimized.\n        y : array-like, shape (n,)\n            Corresponding values where objective has been evaluated.\n        \"\"\"\n        assert len(X) == len(y)\n\n        for x_guess, y_ in zip(X, y):\n            x_guess_ = HyperoptOptimizer.hashable_dict(x_guess)\n            assert x_guess_ in self.trial_id_lookup, \"Appears to be guess that did not originate from suggest\"\n\n            trial_id = self.trial_id_lookup.pop(x_guess_)\n            trial = self.get_trial(trial_id)\n            assert self.cleanup_guess(trial[\"misc\"][\"vals\"]) == x_guess, \"trial ID not consistent with x values stored\"\n\n            # Cast to float to ensure native type\n            result = {\"loss\": float(y_), \"status\": STATUS_OK}\n            trial[\"state\"] = JOB_STATE_DONE\n            trial[\"result\"] = result\n        # hyperopt.fmin.FMinIter.serial_evaluate only does one refresh at end\n        # of loop of a bunch of evals, so we will do the same thing here.\n        self.trials.refresh()\n\n\nopt_wrapper = HyperoptOptimizer\n"
  },
  {
    "path": "bayesmark/builtin_opt/nevergrad_optimizer.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nimport nevergrad.optimization as optimization\nimport numpy as np\nfrom nevergrad import instrumentation as inst\nfrom scipy.stats import norm\n\nfrom bayesmark.abstract_optimizer import AbstractOptimizer\nfrom bayesmark.np_util import linear_rescale\nfrom bayesmark.space import Real\n\n\nclass NevergradOptimizer(AbstractOptimizer):\n    primary_import = \"nevergrad\"\n\n    def __init__(self, api_config, tool, budget):\n        \"\"\"Build wrapper class to use nevergrad optimizer in benchmark.\n\n        Parameters\n        ----------\n        api_config : dict-like of dict-like\n            Configuration of the optimization variables. See API description.\n        budget : int\n            Expected number of max function evals\n        \"\"\"\n        AbstractOptimizer.__init__(self, api_config)\n\n        self.instrum, self.space = NevergradOptimizer.get_nvg_dimensions(api_config)\n\n        dimension = self.instrum.dimension\n        opt_class = optimization.registry[tool]\n        self.optim = opt_class(dimension=dimension, budget=budget)\n\n    @staticmethod\n    def get_nvg_dimensions(api_config):\n        \"\"\"Help routine to setup nevergrad search space in constructor.\n\n        Take api_config as argument so this can be static.\n        \"\"\"\n        # The ordering of iteration prob makes no difference, but just to be\n        # safe and consistnent with space.py, I will make sorted.\n        param_list = sorted(api_config.keys())\n\n        all_args = {}\n        all_prewarp = {}\n        for param_name in param_list:\n            param_config = api_config[param_name]\n\n            param_type = param_config[\"type\"]\n\n            param_space = param_config.get(\"space\", None)\n            param_range = param_config.get(\"range\", None)\n            param_values = param_config.get(\"values\", None)\n\n            prewarp = None\n            if param_type == \"cat\":\n                assert param_space is None\n                assert param_range is None\n                arg = inst.var.SoftmaxCategorical(param_values)\n            elif param_type == \"bool\":\n                assert param_space is None\n                assert param_range is None\n                assert param_values is None\n                arg = inst.var.OrderedDiscrete([False, True])\n            elif param_values is not None:\n                assert param_type in (\"int\", \"ordinal\", \"real\")\n                arg = inst.var.OrderedDiscrete(param_values)\n                # We are throwing away information here, but OrderedDiscrete\n                # appears to be invariant to monotonic transformation anyway.\n            elif param_type == \"int\":\n                assert param_values is None\n                # Need +1 since API in inclusive\n                choices = range(int(param_range[0]), int(param_range[-1]) + 1)\n                arg = inst.var.OrderedDiscrete(choices)\n                # We are throwing away information here, but OrderedDiscrete\n                # appears to be invariant to monotonic transformation anyway.\n            elif param_type == \"real\":\n                assert param_values is None\n                assert param_range is not None\n                # Will need to warp to this space sep.\n                arg = inst.var.Gaussian(mean=0, std=1)\n                prewarp = Real(warp=param_space, range_=param_range)\n            else:\n                assert False, \"type %s not handled in API\" % param_type\n\n            all_args[param_name] = arg\n            all_prewarp[param_name] = prewarp\n        instrum = inst.Instrumentation(**all_args)\n        return instrum, all_prewarp\n\n    def prewarp(self, xx):\n        \"\"\"Extra work needed to get variables into the Gaussian space\n        representation.\"\"\"\n        xxw = {}\n        for arg_name, vv in xx.items():\n            assert np.isscalar(vv)\n            space = self.space[arg_name]\n\n            if space is not None:\n                # Warp so we think it is apriori uniform in [a, b]\n                vv = space.warp(vv)\n                assert vv.size == 1\n\n                # Now make uniform on [0, 1], also unpack warped to scalar\n                (lb, ub), = space.get_bounds()\n                vv = linear_rescale(vv.item(), lb, ub, 0, 1)\n\n                # Now make std Gaussian apriori\n                vv = norm.ppf(vv)\n            assert np.isscalar(vv)\n            xxw[arg_name] = vv\n        return xxw\n\n    def postwarp(self, xxw):\n        \"\"\"Extra work needed to undo the Gaussian space representation.\"\"\"\n        xx = {}\n        for arg_name, vv in xxw.items():\n            assert np.isscalar(vv)\n            space = self.space[arg_name]\n\n            if space is not None:\n                # Now make std Gaussian apriori\n                vv = norm.cdf(vv)\n\n                # Now make uniform on [0, 1]\n                (lb, ub), = space.get_bounds()\n                vv = linear_rescale(vv, 0, 1, lb, ub)\n\n                # Warp so we think it is apriori uniform in [a, b]\n                vv = space.unwarp([vv])\n            assert np.isscalar(vv)\n            xx[arg_name] = vv\n        return xx\n\n    def suggest(self, n_suggestions=1):\n        \"\"\"Get suggestion from nevergrad.\n\n        Parameters\n        ----------\n        n_suggestions : int\n            Desired number of parallel suggestions in the output\n\n        Returns\n        -------\n        next_guess : list of dict\n            List of `n_suggestions` suggestions to evaluate the objective\n            function. Each suggestion is a dictionary where each key\n            corresponds to a parameter being optimized.\n        \"\"\"\n        x_guess_data = [self.optim.ask() for _ in range(n_suggestions)]\n\n        x_guess = [None] * n_suggestions\n        for ii, xx in enumerate(x_guess_data):\n            x_pos, x_kwarg = self.instrum.data_to_arguments(xx)\n            assert x_pos == ()\n            x_guess[ii] = self.postwarp(x_kwarg)\n\n        return x_guess\n\n    def observe(self, X, y):\n        \"\"\"Feed an observation back to nevergrad.\n\n        Parameters\n        ----------\n        X : list of dict-like\n            Places where the objective function has already been evaluated.\n            Each suggestion is a dictionary where each key corresponds to a\n            parameter being optimized.\n        y : array-like, shape (n,)\n            Corresponding values where objective has been evaluated\n        \"\"\"\n        for xx, yy in zip(X, y):\n            xx = self.prewarp(xx)\n            xx = self.instrum.arguments_to_data(**xx)\n            self.optim.tell(xx, yy)\n\n\nopt_wrapper = NevergradOptimizer\n"
  },
  {
    "path": "bayesmark/builtin_opt/opentuner_optimizer.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nIn opentuner, many search techniques are already available. All the names of\nthe techniques can be found as follows:\n```\n>>> import opentuner\n>>> techniques, generators = opentuner.search.technique.all_techniques()\n>>> for t in techniques:\n...     print t.name\n```\nA user can also create new search techniques\n(http://opentuner.org/tutorial/techniques/).\n\nOpentuner will create a multi-arm bandit of multiple techniques if more than\none technique is specified in `args.technique`.\n\nSome bandits with pre-defined techniques are already registered in:\n`opentuner.search.bandittechniques`\n\nBy default, we use a pre-defined bandit called `'AUCBanditMetaTechniqueA'` of 4\ntechniques:\n```\nregister(AUCBanditMetaTechnique([\n        differentialevolution.DifferentialEvolutionAlt(),\n        evolutionarytechniques.UniformGreedyMutation(),\n        evolutionarytechniques.NormalGreedyMutation(mutation_rate=0.3),\n        simplextechniques.RandomNelderMead()],\n        name='AUCBanditMetaTechniqueA'))\n```\nThe other two bandits used in our experiments are: PSO_GA_DE and PSO_GA_Bandit.\nSpecifying a list of multiple techniques will use a multi-arm bandit over them.\n\"\"\"\nimport warnings\nfrom argparse import Namespace\n\nimport opentuner.tuningrunmain\nfrom opentuner.api import TuningRunManager\nfrom opentuner.measurement.interface import DefaultMeasurementInterface as DMI\nfrom opentuner.resultsdb.models import DesiredResult, Result\nfrom opentuner.search.manipulator import (\n    ConfigurationManipulator,\n    EnumParameter,\n    FloatParameter,\n    IntegerParameter,\n    LogFloatParameter,\n    LogIntegerParameter,\n    ScaledNumericParameter,\n)\n\nfrom bayesmark.abstract_optimizer import AbstractOptimizer\nfrom bayesmark.np_util import clip_chk\n\nDEFAULT_TECHNIQUES = (\"AUCBanditMetaTechniqueA\",)\nMEMORY_ONLY_DB = \"sqlite://\"\n\n# Monkey patch here! Opentuner is messed up, TuningRunMain changes global log\n# settings. We should file in issue report here and have them fix it.\nopentuner.tuningrunmain.init_logging = lambda: None\n\n\ndef ClippedParam(cls, epsilon=1e-5):\n    \"\"\"Build wrapper class of opentuner parameter class that use clip check to\n    keep parameters in the allowed range despite numerical errors.\n\n    Class built on `ScaledNumericParameter` abstract class defined in:\n    `opentuner.search.manipulator.ScaledNumericParameter`.\n\n    Parameters\n    ----------\n    cls : ScaledNumericParameter\n        Opentuner parameter class, such as `LogFloatParameter` or\n        `FloatParameter`, which transforms the domain of parameter.\n\n    Returns\n    -------\n    StableClass : ScaledNumericParameter\n        New class equivalent to original `cls` but it overwrites the orginal\n        `_unscale` method to enforce a clip check to keep the parameters within\n        their allowed range.\n    \"\"\"\n    assert issubclass(\n        cls, ScaledNumericParameter\n    ), \"this class cls should inherit from the ScaledNumericParameter class\"\n\n    class StableClass(cls):\n        def _unscale(self, v):\n            unscaled_v = super(StableClass, self)._unscale(v)\n            unscaled_v = clip_chk(unscaled_v, self.min_value, self.max_value)\n            return unscaled_v\n\n    return StableClass\n\n\nclass OpentunerOptimizer(AbstractOptimizer):\n    primary_import = \"opentuner\"\n\n    def __init__(self, api_config, techniques=DEFAULT_TECHNIQUES, n_suggestions=1):\n        \"\"\"Build wrapper class to use opentuner optimizer in benchmark.\n\n        Parameters\n        ----------\n        api_config : dict-like of dict-like\n            Configuration of the optimization variables. See API description.\n\n        techniques : iterable of strings\n            A list or tuple of techniques to use in opentuner. If the list\n            has only one technique, then that technique will be used. If the\n            list has multiple techniques a bandit over those techniques\n            will be used.\n\n        n_suggestions : int\n            Default number of suggestions to be made in parallel.\n        \"\"\"\n        AbstractOptimizer.__init__(self, api_config)\n\n        # Opentuner requires DesiredResult to reference suggestion when making\n        # its observation. x_to_dr maps the dict suggestion to DesiredResult.\n        self.x_to_dr = {}\n        # Keep last suggested x and repeat it whenever opentuner gives up.\n        self.dummy_suggest = None\n\n        \"\"\"Setting up the arguments for opentuner. You can see all possible\n        arguments using:\n        ```\n        >>> import opentuner\n        >>> opentuner.default_argparser().parse_args(['-h'])\n        ```\n        We only change a few arguments (other arguments are set to defaults):\n        * database = MEMORY_ONLY_DB: to use an in-memory sqlite database\n        * parallelism = n_suggestions: num of suggestions to give in parallel\n        * technique = techniques: a list of techniques to be used by opentuner\n        * print_params = False: to avoid opentuner from exiting after printing\n            param spaces\n        \"\"\"\n        args = Namespace(\n            bail_threshold=500,\n            database=MEMORY_ONLY_DB,\n            display_frequency=10,\n            generate_bandit_technique=False,\n            label=None,\n            list_techniques=False,\n            machine_class=None,\n            no_dups=False,\n            parallel_compile=False,\n            parallelism=n_suggestions,\n            pipelining=0,\n            print_params=False,\n            print_search_space_size=False,\n            quiet=False,\n            results_log=None,\n            results_log_details=None,\n            seed_configuration=[],\n            stop_after=None,\n            technique=techniques,\n            test_limit=5000,\n        )\n\n        # Setup some dummy classes required by opentuner to actually run.\n        manipulator = OpentunerOptimizer.build_manipulator(api_config)\n        interface = DMI(args=args, manipulator=manipulator)\n        self.api = TuningRunManager(interface, args)\n\n    @staticmethod\n    def hashable_dict(d):\n        \"\"\"A custom function for hashing dictionaries.\n\n        Parameters\n        ----------\n        d : dict or dict-like\n            The dictionary to be converted to immutable/hashable type.\n\n        Returns\n        -------\n        hashable_object : frozenset of tuple pairs\n            Bijective equivalent to dict that can be hashed.\n        \"\"\"\n        hashable_object = frozenset(d.items())\n        return hashable_object\n\n    @staticmethod\n    def build_manipulator(api_config):\n        \"\"\"Build a ConfigurationManipulator object to be used by opentuner.\n\n        Parameters\n        ----------\n        api_config : dict-like of dict-like\n            Configuration of the optimization variables. See API description.\n\n        Returns\n        -------\n        manipulator : ConfigurationManipulator\n            Some over complexified class required by opentuner to run.\n        \"\"\"\n        manipulator = ConfigurationManipulator()\n\n        for pname in api_config:\n            ptype = api_config[pname][\"type\"]\n            pspace = api_config[pname].get(\"space\", None)\n            pmin, pmax = api_config[pname].get(\"range\", (None, None))\n\n            if ptype == \"real\":\n                if pspace in (\"linear\", \"logit\"):\n                    ot_param = FloatParameter(pname, pmin, pmax)\n                elif pspace in (\"log\", \"bilog\"):\n                    LogFloatParameter_ = ClippedParam(LogFloatParameter)\n                    ot_param = LogFloatParameter_(pname, pmin, pmax)\n                else:\n                    assert False, \"unsupported param space = %s\" % pspace\n            elif ptype == \"int\":\n                if pspace in (\"linear\", \"logit\"):\n                    ot_param = IntegerParameter(pname, pmin, pmax)\n                elif pspace in (\"log\", \"bilog\"):\n                    ot_param = LogIntegerParameter(pname, pmin, pmax)\n                else:\n                    assert False, \"unsupported param space = %s\" % pspace\n            elif ptype == \"bool\":\n                # The actual bool parameter seems not to work in Py3 :(\n                ot_param = IntegerParameter(pname, 0, 1)\n            elif ptype in (\"cat\", \"ordinal\"):\n                # Treat ordinal and categorical variables the same for now.\n                assert \"values\" in api_config[pname]\n                pvalues = api_config[pname][\"values\"]\n                ot_param = EnumParameter(pname, pvalues)\n            else:\n                assert False, \"type=%s/space=%s not handled in opentuner yet\" % (ptype, pspace)\n            manipulator.add_parameter(ot_param)\n        return manipulator\n\n    def suggest(self, n_suggestions=1):\n        \"\"\"Make `n_suggestions` suggestions for what to evaluate next.\n\n        This requires the user observe all previous suggestions before calling\n        again.\n\n        Parameters\n        ----------\n        n_suggestions : int\n            The number of suggestions to return.\n\n        Returns\n        -------\n        next_guess : list of dict\n            List of `n_suggestions` suggestions to evaluate the objective\n            function. Each suggestion is a dictionary where each key\n            corresponds to a parameter being optimized.\n        \"\"\"\n        assert n_suggestions >= 1, \"invalid value for n_suggestions\"\n\n        # Update the n_suggestions if it is different from the current setting.\n        if self.api.search_driver.args.parallelism != n_suggestions:\n            self.api.search_driver.args.parallelism = n_suggestions\n            warnings.warn(\"n_suggestions changed across suggest calls\")\n\n        # Require the user to already observe all previous suggestions.\n        # Otherwise, opentuner will just recycle old suggestions.\n        assert len(self.x_to_dr) == 0, \"all the previous suggestions should have been observed by now\"\n\n        # The real meat of suggest from opentuner: Get next `n_suggestions`\n        # unique suggestions.\n        desired_results = [self.api.get_next_desired_result() for _ in range(n_suggestions)]\n\n        # Save DesiredResult object in dict since observe will need it.\n        X = []\n        using_dummy_suggest = False\n        for ii in range(n_suggestions):\n            # Opentuner can give up, but the API requires guessing forever.\n            if desired_results[ii] is None:\n                assert self.dummy_suggest is not None, \"opentuner gave up on the first call!\"\n                # Use the dummy suggestion in this case.\n                X.append(self.dummy_suggest)\n                using_dummy_suggest = True\n                continue\n\n            # Get the simple dict equivalent to suggestion.\n            x_guess = desired_results[ii].configuration.data\n            X.append(x_guess)\n\n            # Now save the desired result for future use in observe.\n            x_guess_ = OpentunerOptimizer.hashable_dict(x_guess)\n            assert x_guess_ not in self.x_to_dr, \"the suggestions should not already be in the x_to_dr dict\"\n            self.x_to_dr[x_guess_] = desired_results[ii]\n            # This will also catch None from opentuner.\n            assert isinstance(self.x_to_dr[x_guess_], DesiredResult)\n\n        assert len(X) == n_suggestions, \"incorrect number of suggestions provided by opentuner\"\n        # Log suggestion for repeating if opentuner gives up next time. We can\n        # only do this when it is not already being used since it we will be\n        # checking guesses against dummy_suggest in observe.\n        if not using_dummy_suggest:\n            self.dummy_suggest = X[-1]\n        return X\n\n    def observe(self, X, y):\n        \"\"\"Feed the observations back to opentuner.\n\n        Parameters\n        ----------\n        X : list of dict-like\n            Places where the objective function has already been evaluated.\n            Each suggestion is a dictionary where each key corresponds to a\n            parameter being optimized.\n        y : array-like, shape (n,)\n            Corresponding values where objective has been evaluated.\n        \"\"\"\n        assert len(X) == len(y)\n\n        for x_guess, y_ in zip(X, y):\n            x_guess_ = OpentunerOptimizer.hashable_dict(x_guess)\n\n            # If we can't find the dr object then it must be the dummy guess.\n            if x_guess_ not in self.x_to_dr:\n                assert x_guess == self.dummy_suggest, \"Appears to be guess that did not originate from suggest\"\n                continue\n\n            # Get the corresponding DesiredResult object.\n            dr = self.x_to_dr.pop(x_guess_, None)\n            # This will also catch None from opentuner.\n            assert isinstance(dr, DesiredResult), \"DesiredResult object not available in x_to_dr\"\n\n            # Opentuner's arg names assume we are minimizing execution time.\n            # So, if we want to minimize we have to pretend y is a 'time'.\n            result = Result(time=y_)\n            self.api.report_result(dr, result)\n\n\nopt_wrapper = OpentunerOptimizer\n"
  },
  {
    "path": "bayesmark/builtin_opt/pysot_optimizer.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nimport warnings\nfrom copy import copy\n\nimport numpy as np\nfrom poap.strategy import EvalRecord\nfrom pySOT.experimental_design import SymmetricLatinHypercube\nfrom pySOT.optimization_problems import OptimizationProblem\nfrom pySOT.strategy import SRBFStrategy\nfrom pySOT.surrogate import CubicKernel, LinearTail, RBFInterpolant\n\nfrom bayesmark.abstract_optimizer import AbstractOptimizer\nfrom bayesmark.space import JointSpace\n\n\nclass PySOTOptimizer(AbstractOptimizer):\n    primary_import = \"pysot\"\n\n    def __init__(self, api_config):\n        \"\"\"Build wrapper class to use an optimizer in benchmark.\n\n        Parameters\n        ----------\n        api_config : dict-like of dict-like\n            Configuration of the optimization variables. See API description.\n        \"\"\"\n        AbstractOptimizer.__init__(self, api_config)\n\n        self.space_x = JointSpace(api_config)\n        self.bounds = self.space_x.get_bounds()\n        self.create_opt_prob()  # Sets up the optimization problem (needs self.bounds)\n        self.max_evals = np.iinfo(np.int32).max  # NOTE: Largest possible int\n        self.batch_size = None\n        self.history = []\n        self.proposals = []\n\n    def create_opt_prob(self):\n        \"\"\"Create an optimization problem object.\"\"\"\n        opt = OptimizationProblem()\n        opt.lb = self.bounds[:, 0]  # In warped space\n        opt.ub = self.bounds[:, 1]  # In warped space\n        opt.dim = len(self.bounds)\n        opt.cont_var = np.arange(len(self.bounds))\n        opt.int_var = []\n        assert len(opt.cont_var) + len(opt.int_var) == opt.dim\n        opt.objfun = None\n        self.opt = opt\n\n    def start(self, max_evals):\n        \"\"\"Starts a new pySOT run.\"\"\"\n        self.history = []\n        self.proposals = []\n\n        # Symmetric Latin hypercube design\n        des_pts = max([self.batch_size, 2 * (self.opt.dim + 1)])\n        slhd = SymmetricLatinHypercube(dim=self.opt.dim, num_pts=des_pts)\n\n        # Warped RBF interpolant\n        rbf = RBFInterpolant(\n            dim=self.opt.dim,\n            lb=self.opt.lb,\n            ub=self.opt.ub,\n            kernel=CubicKernel(),\n            tail=LinearTail(self.opt.dim),\n            eta=1e-4,\n        )\n\n        # Optimization strategy\n        self.strategy = SRBFStrategy(\n            max_evals=self.max_evals,\n            opt_prob=self.opt,\n            exp_design=slhd,\n            surrogate=rbf,\n            asynchronous=True,\n            batch_size=1,\n            use_restarts=True,\n        )\n\n    def suggest(self, n_suggestions=1):\n        \"\"\"Get a suggestion from the optimizer.\n\n        Parameters\n        ----------\n        n_suggestions : int\n            Desired number of parallel suggestions in the output\n\n        Returns\n        -------\n        next_guess : list of dict\n            List of `n_suggestions` suggestions to evaluate the objective\n            function. Each suggestion is a dictionary where each key\n            corresponds to a parameter being optimized.\n        \"\"\"\n\n        if self.batch_size is None:  # First call to suggest\n            self.batch_size = n_suggestions\n            self.start(self.max_evals)\n\n        # Set the tolerances pretending like we are running batch\n        d, p = float(self.opt.dim), float(n_suggestions)\n        self.strategy.failtol = p * int(max(np.ceil(d / p), np.ceil(4 / p)))\n\n        # Now we can make suggestions\n        x_w = []\n        self.proposals = []\n        for _ in range(n_suggestions):\n            proposal = self.strategy.propose_action()\n            record = EvalRecord(proposal.args, status=\"pending\")\n            proposal.record = record\n            proposal.accept()  # This triggers all the callbacks\n\n            # It is possible that pySOT proposes a previously evaluated point\n            # when all variables are integers, so we just abort in this case\n            # since we have likely converged anyway. See PySOT issue #30.\n            x = list(proposal.record.params)  # From tuple to list\n            x_unwarped, = self.space_x.unwarp(x)\n            if x_unwarped in self.history:\n                warnings.warn(\"pySOT proposed the same point twice\")\n                self.start(self.max_evals)\n                return self.suggest(n_suggestions=n_suggestions)\n\n            # NOTE: Append unwarped to avoid rounding issues\n            self.history.append(copy(x_unwarped))\n            self.proposals.append(proposal)\n            x_w.append(copy(x_unwarped))\n\n        return x_w\n\n    def _observe(self, x, y):\n        # Find the matching proposal and execute its callbacks\n        idx = [x == xx for xx in self.history]\n        i = np.argwhere(idx)[0].item()  # Pick the first index if there are ties\n        proposal = self.proposals[i]\n        proposal.record.complete(y)\n        self.proposals.pop(i)\n        self.history.pop(i)\n\n    def observe(self, X, y):\n        \"\"\"Send an observation of a suggestion back to the optimizer.\n\n        Parameters\n        ----------\n        X : list of dict-like\n            Places where the objective function has already been evaluated.\n            Each suggestion is a dictionary where each key corresponds to a\n            parameter being optimized.\n        y : array-like, shape (n,)\n            Corresponding values where objective has been evaluated\n        \"\"\"\n        assert len(X) == len(y)\n\n        for x_, y_ in zip(X, y):\n            # Just ignore, any inf observations we got, unclear if right thing\n            if np.isfinite(y_):\n                self._observe(x_, y_)\n\n\nopt_wrapper = PySOTOptimizer\n"
  },
  {
    "path": "bayesmark/builtin_opt/random_optimizer.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nimport bayesmark.random_search as rs\nfrom bayesmark import np_util\nfrom bayesmark.abstract_optimizer import AbstractOptimizer\n\n\nclass RandomOptimizer(AbstractOptimizer):\n    # Unclear what is best package to list for primary_import here.\n    primary_import = \"bayesmark\"\n\n    def __init__(self, api_config, random=np_util.random):\n        \"\"\"Build wrapper class to use random search function in benchmark.\n\n        Settings for `suggest_dict` can be passed using kwargs.\n\n        Parameters\n        ----------\n        api_config : dict-like of dict-like\n            Configuration of the optimization variables. See API description.\n        \"\"\"\n        AbstractOptimizer.__init__(self, api_config)\n        self.random = random\n\n    def suggest(self, n_suggestions=1):\n        \"\"\"Get suggestion.\n\n        Parameters\n        ----------\n        n_suggestions : int\n            Desired number of parallel suggestions in the output\n\n        Returns\n        -------\n        next_guess : list of dict\n            List of `n_suggestions` suggestions to evaluate the objective\n            function. Each suggestion is a dictionary where each key\n            corresponds to a parameter being optimized.\n        \"\"\"\n        x_guess = rs.suggest_dict([], [], self.api_config, n_suggestions=n_suggestions, random=self.random)\n        return x_guess\n\n    def observe(self, X, y):\n        \"\"\"Feed an observation back.\n\n        Parameters\n        ----------\n        X : list of dict-like\n            Places where the objective function has already been evaluated.\n            Each suggestion is a dictionary where each key corresponds to a\n            parameter being optimized.\n        y : array-like, shape (n,)\n            Corresponding values where objective has been evaluated\n        \"\"\"\n        # Random search so don't do anything\n        pass\n\n\n# All optimizer wrappers need to assign their wrapper to the name opt_wrapper because experiment always tries to import\n# opt_wrapper regardless of the optimizer it is importing.\nopt_wrapper = RandomOptimizer\n"
  },
  {
    "path": "bayesmark/builtin_opt/scikit_optimizer.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nimport numpy as np\nfrom scipy.interpolate import interp1d\nfrom skopt import Optimizer as SkOpt\nfrom skopt.space import Categorical, Integer, Real\n\nfrom bayesmark.abstract_optimizer import AbstractOptimizer\n\n\nclass ScikitOptimizer(AbstractOptimizer):\n    primary_import = \"scikit-optimize\"\n\n    def __init__(self, api_config, base_estimator=\"GP\", acq_func=\"gp_hedge\", n_initial_points=5, **kwargs):\n        \"\"\"Build wrapper class to use an optimizer in benchmark.\n\n        Parameters\n        ----------\n        api_config : dict-like of dict-like\n            Configuration of the optimization variables. See API description.\n        base_estimator : {'GP', 'RF', 'ET', 'GBRT'}\n            How to estimate the objective function.\n        acq_func : {'LCB', 'EI', 'PI', 'gp_hedge', 'EIps', 'PIps'}\n            Acquisition objective to decide next suggestion.\n        n_initial_points : int\n            Number of points to sample randomly before actual Bayes opt.\n        \"\"\"\n        AbstractOptimizer.__init__(self, api_config)\n\n        dimensions, self.round_to_values = ScikitOptimizer.get_sk_dimensions(api_config)\n\n        # Older versions of skopt don't copy over the dimensions names during\n        # normalization and hence the names are missing in\n        # self.skopt.space.dimensions. Therefore, we save our own copy of\n        # dimensions list to be safe. If we can commit to using the newer\n        # versions of skopt we can delete self.dimensions.\n        self.dimensions_list = tuple(dd.name for dd in dimensions)\n\n        # Undecided where we want to pass the kwargs, so for now just make sure\n        # they are blank\n        assert len(kwargs) == 0\n\n        self.skopt = SkOpt(\n            dimensions,\n            n_initial_points=n_initial_points,\n            base_estimator=base_estimator,\n            acq_func=acq_func,\n            acq_optimizer=\"auto\",\n            acq_func_kwargs={},\n            acq_optimizer_kwargs={},\n        )\n\n    @staticmethod\n    def get_sk_dimensions(api_config, transform=\"normalize\"):\n        \"\"\"Help routine to setup skopt search space in constructor.\n\n        Take api_config as argument so this can be static.\n        \"\"\"\n        # The ordering of iteration prob makes no difference, but just to be\n        # safe and consistnent with space.py, I will make sorted.\n        param_list = sorted(api_config.keys())\n\n        sk_dims = []\n        round_to_values = {}\n        for param_name in param_list:\n            param_config = api_config[param_name]\n\n            param_type = param_config[\"type\"]\n\n            param_space = param_config.get(\"space\", None)\n            param_range = param_config.get(\"range\", None)\n            param_values = param_config.get(\"values\", None)\n\n            # Some setup for case that whitelist of values is provided:\n            values_only_type = param_type in (\"cat\", \"ordinal\")\n            if (param_values is not None) and (not values_only_type):\n                assert param_range is None\n                param_values = np.unique(param_values)\n                param_range = (param_values[0], param_values[-1])\n                round_to_values[param_name] = interp1d(\n                    param_values, param_values, kind=\"nearest\", fill_value=\"extrapolate\"\n                )\n\n            if param_type == \"int\":\n                # Integer space in sklearn does not support any warping => Need\n                # to leave the warping as linear in skopt.\n                sk_dims.append(Integer(param_range[0], param_range[-1], transform=transform, name=param_name))\n            elif param_type == \"bool\":\n                assert param_range is None\n                assert param_values is None\n                sk_dims.append(Integer(0, 1, transform=transform, name=param_name))\n            elif param_type in (\"cat\", \"ordinal\"):\n                assert param_range is None\n                # Leave x-form to one-hot as per skopt default\n                sk_dims.append(Categorical(param_values, name=param_name))\n            elif param_type == \"real\":\n                # Skopt doesn't support all our warpings, so need to pick\n                # closest substitute it does support.\n                prior = \"log-uniform\" if param_space in (\"log\", \"logit\") else \"uniform\"\n                sk_dims.append(Real(param_range[0], param_range[-1], prior=prior, transform=transform, name=param_name))\n            else:\n                assert False, \"type %s not handled in API\" % param_type\n        return sk_dims, round_to_values\n\n    def suggest(self, n_suggestions=1):\n        \"\"\"Get a suggestion from the optimizer.\n\n        Parameters\n        ----------\n        n_suggestions : int\n            Desired number of parallel suggestions in the output\n\n        Returns\n        -------\n        next_guess : list of dict\n            List of `n_suggestions` suggestions to evaluate the objective\n            function. Each suggestion is a dictionary where each key\n            corresponds to a parameter being optimized.\n        \"\"\"\n        # First get list of lists from skopt.ask()\n        next_guess = self.skopt.ask(n_points=n_suggestions)\n        # Then convert to list of dicts\n        next_guess = [dict(zip(self.dimensions_list, x)) for x in next_guess]\n\n        # Now do the rounding, custom rounding is not supported in skopt. Note\n        # that there is not nec a round function for each dimension here.\n        for param_name, round_f in self.round_to_values.items():\n            for xx in next_guess:\n                xx[param_name] = round_f(xx[param_name])\n        return next_guess\n\n    def observe(self, X, y):\n        \"\"\"Send an observation of a suggestion back to the optimizer.\n\n        Parameters\n        ----------\n        X : list of dict-like\n            Places where the objective function has already been evaluated.\n            Each suggestion is a dictionary where each key corresponds to a\n            parameter being optimized.\n        y : array-like, shape (n,)\n            Corresponding values where objective has been evaluated\n        \"\"\"\n        # Supposedly skopt can handle blocks, but not sure about interface for\n        # that. Just do loop to be safe for now.\n        for xx, yy in zip(X, y):\n            # skopt needs lists instead of dicts\n            xx = [xx[dim_name] for dim_name in self.dimensions_list]\n            # Just ignore, any inf observations we got, unclear if right thing\n            if np.isfinite(yy):\n                self.skopt.tell(xx, yy)\n\n\nopt_wrapper = ScikitOptimizer\n"
  },
  {
    "path": "bayesmark/cmd_parse.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Anything related to parsing command line arguments goes in here. There are some custom data structures to\nrepresent all the options available in the experiments here.\n\nNot currently any doc strings in this file because it may become obsolete with the use of fire package.\n\"\"\"\nimport argparse\nimport json\nimport os.path\nimport sys\nimport uuid as pyuuid\nfrom enum import IntEnum, auto\nfrom pathlib import PosixPath\n\nimport git\nfrom git.exc import InvalidGitRepositoryError\nfrom pathvalidate.argparse import sanitize_filename, validate_filename, validate_filepath\n\nfrom bayesmark.builtin_opt.config import CONFIG\nfrom bayesmark.constants import ARG_DELIM, DATA_LOADER_NAMES, METRICS, MODEL_NAMES, OPTIMIZERS_FILE, PY_INTERPRETER\nfrom bayesmark.path_util import absopen, abspath\nfrom bayesmark.util import shell_join\n\nassert not any(ARG_DELIM in opt for opt in MODEL_NAMES)\nassert not any(ARG_DELIM in opt for opt in DATA_LOADER_NAMES)\n\n\nclass CmdArgs(IntEnum):\n    uuid = auto()\n    db_root = auto()\n    optimizer_root = auto()\n    data_root = auto()\n    db = auto()\n    optimizer = auto()\n    data = auto()\n    classifier = auto()\n    metric = auto()\n    n_calls = auto()\n    n_suggest = auto()\n    n_repeat = auto()\n    n_jobs = auto()\n    jobs_file = auto()\n    ravel = auto()\n    verbose = auto()\n    dry_run = auto()\n    rev = auto()\n    opt_rev = auto()\n    timeout = auto()\n\n\nCMD_STR = {\n    CmdArgs.uuid: (\"-u\", \"--uuid\"),\n    CmdArgs.db_root: (\"-dir\", \"-db-root\"),\n    CmdArgs.optimizer_root: (\"-odir\", \"--opt-root\"),\n    CmdArgs.data_root: (\"-dr\", \"--data-root\"),\n    CmdArgs.db: (\"-b\", \"--db\"),\n    CmdArgs.optimizer: (\"-o\", \"--opt\"),\n    CmdArgs.data: (\"-d\", \"--data\"),\n    CmdArgs.classifier: (\"-c\", \"--classifier\"),\n    CmdArgs.metric: (\"-m\", \"--metric\"),\n    CmdArgs.n_calls: (\"-n\", \"--calls\"),\n    CmdArgs.n_suggest: (\"-p\", \"--suggestions\"),\n    CmdArgs.n_repeat: (\"-r\", \"--repeat\"),\n    CmdArgs.n_jobs: (\"-nj\", \"--num-jobs\"),\n    CmdArgs.jobs_file: (\"-ofile\", \"--jobs-file\"),\n    CmdArgs.ravel: (\"-rv\", \"--ravel\"),\n    CmdArgs.verbose: (\"-v\", \"--verbose\"),\n    CmdArgs.timeout: (\"-t\", \"--timeout\"),\n    CmdArgs.dry_run: (None, \"dry_run\"),  # Will not be specified from CLI\n    CmdArgs.rev: (None, \"rev\"),  # Will not be specified from CLI\n    CmdArgs.opt_rev: (None, \"opt_rev\"),  # Will not be specified from CLI. Which version of optimizer.\n}\n\n\ndef arg_to_str(arg):\n    # We can change this so it is arg.value, or someway to be usable by field interface\n    _, dest = str(arg).split(\".\")\n    return dest\n\n\ndef namespace_to_dict(args_ns):\n    args = vars(args_ns)\n    args = {kk: args[arg_to_str(kk)] for kk in CMD_STR if (arg_to_str(kk) in args)}\n    return args\n\n\ndef serializable_dict(args):\n    args_str = {CMD_STR[kk][1]: args[kk] for kk in CMD_STR if (kk in args)}\n    assert len(args_str) == len(args)\n    return args_str\n\n\ndef unserializable_dict(args_str):\n    args = {kk: args_str[CMD_STR[kk][1]] for kk in CMD_STR if (CMD_STR[kk][1] in args_str)}\n    assert len(args_str) == len(args)\n    return args\n\n\ndef add_argument(parser, arg, **kwargs):\n    short_name, long_name = CMD_STR[arg]\n    dest = arg_to_str(arg)\n    parser.add_argument(short_name, long_name, dest=dest, **kwargs)\n\n\ndef filepath(value):\n    \"\"\"Work around for `pathvalidate` bug.\"\"\"\n    if value == \".\":\n        return value\n    validate_filepath(value, platform=\"auto\")\n    return value\n\n\ndef filename(value):\n    validate_filename(value, platform=\"universal\")\n    return value\n\n\ndef uuid(val_str):\n    val = str.lower(val_str)\n    uuid_ = pyuuid.UUID(hex=val)\n    assert val == uuid_.hex, \"error in parsing uuid\"\n    return val\n\n\ndef positive_int(val_str):\n    val = int(val_str)\n    if val <= 0:\n        msg = \"expected positive, got %s\" % val_str\n        raise argparse.ArgumentTypeError(msg)\n    return val\n\n\ndef joinable(val_str):\n    val = str(val_str)  # just for good measure\n    validate_filename(val, platform=\"universal\")  # we choose to be at least as strict as filenames\n    if ARG_DELIM in val:\n        msg = \"delimiter %s not allowed in choice %s\" % (ARG_DELIM, val)\n        raise argparse.ArgumentTypeError(msg)\n    return val\n\n\ndef load_rev_number():\n    # This function uses a lot of language \"power features\" that could be considered bad form:\n    # 1) does a conditional import to get version\n    # 2) uses __file__ to try and extract and git repo version during execution\n    # We will let this fly anyway because:\n    # 1) The results of this are only used for logging anyway\n    # 2) This is a command parsing module of the code and inherently very non-pure and doing IO etc\n    # 3) Unclear if there is a cleaner way to do this\n\n    # Get rev from version file (if running inside the pip-installable wheel without the git repo)\n    try:\n        from bayesmark import version\n\n        rev_file = version.VERSION\n    except ImportError:\n        rev_file = None\n    else:\n        rev_file = rev_file.strip()\n        rev = rev_file\n\n    # Get rev from git API if inside git repo (and not built wheel from pip install ...)\n    wdir = os.path.abspath(os.path.join(os.path.dirname(__file__), \"..\"))\n    try:\n        repo = git.Repo(path=wdir, search_parent_directories=False)\n    except InvalidGitRepositoryError:\n        rev_repo = None\n    else:\n        rev_repo = repo.head.commit.hexsha\n        rev_repo = rev_repo.strip()\n        rev = rev_repo\n\n    # Check coherence of what we found\n    if (rev_repo is None) and (rev_file is None):\n        raise RuntimeError(\"Must specify version.py if not inside a git repo.\")\n    if (rev_repo is not None) and (rev_file is not None):\n        assert rev_repo == rev_file, \"Rev file %s does not match rev git %s\" % (rev_file, rev_repo)\n\n    assert rev == rev.strip()\n    # We could first enforce is_lower_hex if we want to enforce that\n    rev = rev[:7]\n    return rev\n\n\ndef base_parser():\n    parser = argparse.ArgumentParser(add_help=False)\n\n    add_argument(\n        parser, CmdArgs.db_root, default=\".\", type=filepath, help=\"root directory for all benchmark experiments output\"\n    )\n    add_argument(\n        parser, CmdArgs.optimizer_root, default=\".\", type=filepath, help=\"Directory with optimization wrappers\"\n    )\n    # Always a verbose flag option\n    add_argument(parser, CmdArgs.verbose, action=\"store_true\", help=\"print the study logs to console\")\n    return parser\n\n\ndef launcher_parser(description):\n    parser = argparse.ArgumentParser(description=description, parents=[base_parser()])\n\n    add_argument(parser, CmdArgs.uuid, type=uuid, help=\"length 32 hex UUID for this experiment\")\n    add_argument(parser, CmdArgs.data_root, type=filepath, help=\"root directory for all custom csv files\")\n    add_argument(parser, CmdArgs.db, type=filename, help=\"database ID of this benchmark experiment\")\n\n    add_argument(parser, CmdArgs.optimizer, type=joinable, nargs=\"+\", help=\"optimizers to use\")\n    add_argument(parser, CmdArgs.data, type=joinable, nargs=\"+\", help=\"data sets to use\")\n    add_argument(parser, CmdArgs.classifier, type=joinable, nargs=\"+\", help=\"classifiers to use\")\n    add_argument(parser, CmdArgs.metric, type=str, choices=METRICS, nargs=\"+\", help=\"scoring metric to use\")\n\n    # Iterations counts used in experiments\n    add_argument(parser, CmdArgs.n_calls, default=100, type=positive_int, help=\"number of function evaluations\")\n    add_argument(\n        parser, CmdArgs.n_suggest, default=1, type=positive_int, help=\"number of suggestions to provide in parallel\"\n    )\n    add_argument(parser, CmdArgs.n_repeat, default=20, type=positive_int, help=\"number of repetitions of each study\")\n    add_argument(parser, CmdArgs.timeout, default=0, type=int, help=\"Timeout per experiment (0 = no timeout)\")\n\n    # Arguments for creating dry run jobs file\n    add_argument(\n        parser,\n        CmdArgs.n_jobs,\n        type=int,\n        default=0,\n        help=\"number of jobs to put in the dry run file, the default 0 value disables dry run (real run)\",\n    )\n    # Using default of current dir for jobs file output since that is generally the default for everything\n    add_argument(\n        parser, CmdArgs.jobs_file, type=filepath, default=\"./jobs.txt\", help=\"a jobs file with all commands to be run\"\n    )\n    return parser\n\n\ndef experiment_parser(description):\n    parser = argparse.ArgumentParser(description=description, parents=[base_parser()])\n\n    add_argument(parser, CmdArgs.uuid, type=uuid, required=True, help=\"length 32 hex UUID for this experiment\")\n\n    # This could be made simpler and use '.' default for dataroot, even if no custom data used.\n    add_argument(parser, CmdArgs.data_root, type=filepath, help=\"root directory for all custom csv files\")\n    add_argument(parser, CmdArgs.db, type=filename, required=True, help=\"database ID of this benchmark experiment\")\n    add_argument(parser, CmdArgs.optimizer, required=True, type=joinable, help=\"optimizer to use\")\n    add_argument(parser, CmdArgs.data, required=True, type=joinable, help=\"data set to use\")\n    add_argument(parser, CmdArgs.classifier, required=True, type=joinable, help=\"classifier to use\")\n    add_argument(parser, CmdArgs.metric, required=True, type=str, choices=METRICS, help=\"scoring metric to use\")\n\n    add_argument(parser, CmdArgs.n_calls, default=100, type=positive_int, help=\"number of function evaluations\")\n    add_argument(\n        parser, CmdArgs.n_suggest, default=1, type=positive_int, help=\"number of suggestions to provide in parallel\"\n    )\n    return parser\n\n\ndef agg_parser(description):\n    parser = argparse.ArgumentParser(description=description, parents=[base_parser()])\n    add_argument(parser, CmdArgs.db, type=filename, required=True, help=\"database ID of this benchmark experiment\")\n    add_argument(\n        parser,\n        CmdArgs.ravel,\n        action=\"store_true\",\n        help=\"ravel all studies to store batch suggestions as if they were serial (deprecated)\",\n    )\n    return parser\n\n\ndef general_parser(description):\n    parser = argparse.ArgumentParser(description=description, parents=[base_parser()])\n    add_argument(parser, CmdArgs.db, type=filename, required=True, help=\"database ID of this benchmark experiment\")\n    return parser\n\n\ndef parse_args(parser, argv=None):\n    \"\"\"Note that this argument parser does not check compatibility between clf/reg metric and data set.\n    \"\"\"\n    args = parser.parse_args(argv)\n    args = namespace_to_dict(args)\n\n    args[CmdArgs.dry_run] = (CmdArgs.n_jobs in args) and (args[CmdArgs.n_jobs] > 0)\n    # Does not check dir actually exists here, but whatever\n    args[CmdArgs.jobs_file] = abspath(args[CmdArgs.jobs_file], verify=False) if args[CmdArgs.dry_run] else None\n\n    # Then make sure all path vars are abspath:\n    # Dry run might be executing on diff system => cannot verify yet\n    args[CmdArgs.db_root] = abspath(args[CmdArgs.db_root], verify=not args[CmdArgs.dry_run])\n    args[CmdArgs.optimizer_root] = abspath(args[CmdArgs.optimizer_root], verify=True)\n    if (CmdArgs.data_root in args) and (args[CmdArgs.data_root] is not None):\n        args[CmdArgs.data_root] = abspath(args[CmdArgs.data_root], verify=not args[CmdArgs.dry_run])\n\n    # Get git version of the benchmark itself for meta-data, just in case we need it.\n    args[CmdArgs.rev] = load_rev_number()\n\n    # We may support ability to specify version at args in the future, from now it is implied\n    args[CmdArgs.opt_rev] = None\n    return args\n\n\ndef _cleanup(filename_str):\n    filename_str = sanitize_filename(filename_str, replacement_text=\"-\", platform=\"universal\")\n    filename_str = filename_str.replace(ARG_DELIM, \"-\")\n    return filename_str\n\n\ndef infer_settings(opt_root, opt_pattern=\"**/optimizer.py\"):\n    opt_root = PosixPath(opt_root)\n    assert opt_root.is_dir(), \"Opt root directory doesn't exist: %s\" % opt_root\n    assert opt_root.is_absolute(), \"Only absolute path should have even gotten this far.\"\n\n    # Always sort for reproducibility\n    source_files = sorted(opt_root.glob(opt_pattern))\n    source_files = [ss.relative_to(opt_root) for ss in source_files]\n\n    settings = {_cleanup(str(ss.parent)): [str(ss), {}] for ss in source_files}\n\n    assert all(joinable(kk) for kk in settings), \"Something went wrong in name sanitization.\"\n    assert len(settings) == len(source_files), \"Name collision after sanitization of %s\" % repr(source_files)\n    assert len(set(CONFIG.keys()) & set(settings.keys())) == 0, \"Name collision with builtin optimizers.\"\n\n    return settings\n\n\ndef load_optimizer_settings(opt_root):\n    try:\n        with absopen(os.path.join(opt_root, OPTIMIZERS_FILE), \"r\") as f:\n            settings = json.load(f)\n    except FileNotFoundError:\n        # Search for optimizers instead\n        settings = infer_settings(opt_root)\n\n    assert isinstance(settings, dict)\n    assert not any((ARG_DELIM in opt) for opt in settings), \"optimizer names violates name convention\"\n    return settings\n\n\ndef cmd_str():\n    cmd = \"%s %s\" % (PY_INTERPRETER, shell_join(sys.argv))\n    return cmd\n"
  },
  {
    "path": "bayesmark/constants.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"General constants that are used in multiple files in the code base.\n\"\"\"\n\n# Special constant for random search since it gets used as our reference point in the baselines\nRANDOM_SEARCH = \"RandomSearch\"\nOPTIMIZERS_FILE = \"config.json\"\nARG_DELIM = \"_\"  # Delimeter used when concat cmd argument for any reason\nPY_INTERPRETER = \"python\"  # What command to call for sub process, we could specify version number here also.\n\n# Variables to save in SAL\nEVAL = \"eval\"\nTIME = \"time\"\nSUGGEST_LOG = \"suggest_log\"\nEXP_VARS = (EVAL, TIME, SUGGEST_LOG)\n\n# Derived variables to save in SAL\nTIME_RESULTS = \"time\"\nEVAL_RESULTS = \"eval\"\nBASELINE = \"baseline\"\nPERF_RESULTS = \"perf\"\nMEAN_SCORE = \"summary\"\n\n# Coordinate dim names needed in saved xr Datasets\nITER = \"iter\"\nTEST_CASE = \"function\"\nMETHOD = \"optimizer\"\nTRIAL = \"study_id\"\nSUGGEST = \"suggestion\"\nOBJECTIVE = \"objective\"\n\n# Dataset variables for eval results\nVISIBLE_TO_OPT = \"_visible_to_opt\"\n\n# Dataset variables for time results\nSUGGEST_PHASE = \"suggest\"\nOBS_PHASE = \"observe\"\nEVAL_PHASE = \"eval\"\nEVAL_PHASE_SUM = \"eval_sum\"\nEVAL_PHASE_MAX = \"eval_max\"\n\n# Dataset variables for aggregate results\nPERF_MED = \"median\"\nLB_MED = \"median LB\"\nUB_MED = \"median UB\"\nNORMED_MED = \"median normed\"\nPERF_MEAN = \"mean\"\nLB_MEAN = \"mean LB\"\nUB_MEAN = \"mean UB\"\nNORMED_MEAN = \"mean normed\"\nLB_NORMED_MEAN = \"mean normed LB\"\nUB_NORMED_MEAN = \"mean normed UB\"\nPERF_BEST = \"best\"\nPERF_CLIP = \"clip\"\n\n# Choices used for test problems, there is some redundant specification with sklearn funcs file here\nMODEL_NAMES = (\"DT\", \"MLP-adam\", \"MLP-sgd\", \"RF\", \"SVM\", \"ada\", \"kNN\", \"lasso\", \"linear\")\nDATA_LOADER_NAMES = (\"breast\", \"digits\", \"iris\", \"wine\", \"boston\", \"diabetes\")\n\nSCORERS_CLF = (\"nll\", \"acc\")\nSCORERS_REG = (\"mae\", \"mse\")\nMETRICS = tuple(sorted(SCORERS_CLF + SCORERS_REG))\n"
  },
  {
    "path": "bayesmark/data.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Module to deal with all matters relating to loading example data sets, which we tune ML models to.\n\"\"\"\nfrom enum import IntEnum, auto\n\nimport numpy as np\nimport pandas as pd  # only needed for csv reader, maybe try something else\nfrom sklearn import datasets\n\nfrom bayesmark.constants import DATA_LOADER_NAMES, SCORERS_CLF, SCORERS_REG\nfrom bayesmark.path_util import join_safe_r\nfrom bayesmark.stats import robust_standardize\n\n\nclass ProblemType(IntEnum):\n    \"\"\"The different problem types we consider. Currently, just regression (`reg`) and classification (`clf`).\n    \"\"\"\n\n    clf = auto()\n    reg = auto()\n\n\nDATA_LOADERS = {\n    \"digits\": (datasets.load_digits, ProblemType.clf),\n    \"iris\": (datasets.load_iris, ProblemType.clf),\n    \"wine\": (datasets.load_wine, ProblemType.clf),\n    \"breast\": (datasets.load_breast_cancer, ProblemType.clf),\n    \"boston\": (datasets.load_boston, ProblemType.reg),\n    \"diabetes\": (datasets.load_diabetes, ProblemType.reg),\n}\n\nassert sorted(DATA_LOADERS.keys()) == sorted(DATA_LOADER_NAMES)\n\n# Arguably, this could go in constants, but doesn't cause extra imports being here.\nMETRICS_LOOKUP = {ProblemType.clf: SCORERS_CLF, ProblemType.reg: SCORERS_REG}\n\n\ndef get_problem_type(dataset_name):\n    \"\"\"Determine if this dataset is a regression of classification problem.\n\n    Parameters\n    ----------\n    dataset : str\n        Which data set to use, must be key in `DATA_LOADERS` dict, or name of custom csv file.\n\n    Returns\n    -------\n    problem_type : ProblemType\n        `Enum` to indicate if regression of classification data set.\n    \"\"\"\n    if dataset_name in DATA_LOADERS:\n        _, problem_type = DATA_LOADERS[dataset_name]\n        return problem_type\n\n    # Maybe we can come up with a better system, but for now let's use a convention based on the naming of the csv file.\n    if dataset_name.startswith(\"reg-\"):\n        return ProblemType.reg\n    if dataset_name.startswith(\"clf-\"):\n        return ProblemType.clf\n    assert False, \"Can't determine problem type from dataset name.\"\n\n\ndef _csv_loader(dataset_name, return_X_y, data_root, clip_x=100):  # pragma: io\n    \"\"\"Load custom csv files for use in the benchmark.\n\n    This function assumes ``dataset_name + \".csv\"`` is a csv file found in the `data_root` path.  It also assumes the\n    last column of the csv file is the target and the other columns are features.\n\n    The target column should be `int` for classification and `float` for regression. Column names ending in ``\"_cat\"``\n    are assumed to be categorical and will be one-hot encoded.\n\n    The features (and target for regression) are robust standardized. The features are also clipped to be in\n    ``[-clip_x, clip_x]`` *after* standardization.\n    \"\"\"\n    assert return_X_y, \"Only returning (X,y) tuple supported right now.\"\n    assert clip_x >= 0\n\n    # Quantile range for robust standardization. The 86% range is the most efficient for Gaussians. See:\n    # https://github.com/scikit-learn/scikit-learn/issues/10139#issuecomment-344705040\n    q_level = 0.86\n\n    path = join_safe_r(data_root, dataset_name + \".csv\")\n\n    # For now, use convention that can get problem type based on data set name\n    problem_type = get_problem_type(dataset_name)\n\n    # Assuming no missing data in source csv files at the moment, these will\n    # result in error.\n    df = pd.read_csv(\n        path, header=0, index_col=False, engine=\"c\", na_filter=False, true_values=[\"true\"], false_values=[\"false\"]\n    )\n\n    label = df.columns[-1]  # Assume last col is target\n\n    target = df.pop(label).values\n    if problem_type == ProblemType.clf:\n        assert target.dtype in (np.bool_, np.int_)\n        target = target.astype(np.int_)  # convert to int for skl\n    if problem_type == ProblemType.reg:\n        assert target.dtype == np.float_\n        # 86% range is the most efficient (at least for Gaussians)\n        target = robust_standardize(target, q_level=q_level)\n\n    # Fill in an categorical variables (object dtype of cols names ..._cat)\n    cat_cols = sorted(cc for cc in df.columns if cc.endswith(\"_cat\") or df[cc].dtype.kind == \"O\")\n    df = pd.get_dummies(df, columns=cat_cols, drop_first=True, dtype=np.float_)\n    # Could also sort all columns to be sure it will be reprod\n\n    # Everything should now be in float\n    assert (df.dtypes == np.float_).all()\n\n    data = df.values\n    data = robust_standardize(data, q_level=q_level)\n    # Debatable if we should include this, but there are a lot of outliers\n    data = np.clip(data, -clip_x, clip_x)\n\n    # We should probably do some logging or something to wrap up\n    return data, target, problem_type\n\n\ndef load_data(dataset_name, data_root=None):  # pragma: io\n    \"\"\"Load a data set and return it in, pre-processed into numpy arrays.\n\n    Parameters\n    ----------\n    dataset : str\n        Which data set to use, must be key in `DATA_LOADERS` dict, or name of custom csv file.\n    data_root : str\n        Root directory to look for all custom csv files. May be ``None`` for sklearn data sets.\n\n    Returns\n    -------\n    data : :class:`numpy:numpy.ndarray` of shape (n, d)\n        The feature matrix of the data set. It will be `float` array.\n    target : :class:`numpy:numpy.ndarray` of shape (n,)\n        The target vector for the problem, which is `int` for classification and `float` for regression.\n    problem_type : :class:`bayesmark.data.ProblemType`\n        `Enum` to indicate if regression of classification data set.\n    \"\"\"\n    if dataset_name in DATA_LOADERS:\n        loader_f, problem_type = DATA_LOADERS[dataset_name]\n        data, target = loader_f(return_X_y=True)\n    else:  # try to load as custom csv\n        assert data_root is not None, \"data root cannot be None when custom csv requested.\"\n        data, target, problem_type = _csv_loader(dataset_name, return_X_y=True, data_root=data_root)\n    return data, target, problem_type\n"
  },
  {
    "path": "bayesmark/expected_max.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Compute expected maximum or minimum from iid samples.\n\"\"\"\nimport numpy as np\nfrom scipy.special import gammaln, logsumexp\n\n\ndef get_expected_max_weights(n, m):\n    \"\"\"Get the L-estimator weights for computing unbiased estimator of expected ``max(x[1:m])`` on a data set.\n\n    Parameters\n    ----------\n    n : int\n        Number of data points in data set ``len(x)``. Must be ``>= 1``.\n    m : `int` or :class:`numpy:numpy.ndarray` with dtype `int`\n        This function is for estimating the expected maximum over `m` iid draws. Require ``m >= 1``. This can be\n        broadcasted. If ``m > n``, the weights will be nan, because there is no way to get unbiased estimate in that\n        case.\n\n    Returns\n    -------\n    pdf : :class:`numpy:numpy.ndarray`, shape (n,)\n        The weights for L-estimator. Will be positive and sum to one.\n    \"\"\"\n    assert np.ndim(n) == 0\n    assert n >= 1  # otherwise makes no sense\n\n    m = np.asarray(m)  # Must be np type for broadcasting\n    # We could also check dtype is int, but not bothering here\n    assert np.all(m >= 1)  # otherwise makes no sense\n    m = m[..., None]\n\n    kk = 1 + np.arange(n)\n    lpdf = gammaln(kk) - gammaln(kk - (m - 1))\n    pdf = np.exp(lpdf - logsumexp(lpdf, axis=-1, keepdims=True))\n    # expect nan for m > n\n    assert np.all((m > n) | np.isclose(np.sum(pdf, axis=-1, keepdims=True), 1.0))\n    return pdf\n\n\ndef expected_max(x, m):\n    \"\"\"Compute unbiased estimator of expected ``max(x[1:m])`` on a data set.\n\n    Parameters\n    ----------\n    x : :class:`numpy:numpy.ndarray` of shape (n,)\n        Data set we would like expected ``max(x[1:m])`` on.\n    m : `int` or :class:`numpy:numpy.ndarray` with dtype `int`\n        This function is for estimating the expected maximum over `m` iid draws. Require ``m >= 1``. This can be\n        broadcasted. If ``m > n``, the weights will be nan, because there is no way to get unbiased estimate in that\n        case.\n\n    Returns\n    -------\n    E_max_x : float\n        Unbiased estimate of mean max of `m` draws from distribution on `x`.\n    \"\"\"\n    assert np.ndim(x) == 1\n    # m is validated by get_expected_max_weights\n\n    # Get order stats for L-estimator\n    x = np.array(x, copy=True)  # we will modify in place\n    x.sort()  # in place!!\n\n    # Now get estimator weights\n    n, = x.shape\n    if n == 0:\n        return np.full(np.shape(m), np.nan)\n    pdf = get_expected_max_weights(n, m)\n\n    # Compute L-estimator\n    E_max_x = np.sum(x * pdf, axis=-1)\n    return E_max_x\n\n\ndef expected_min(x, m):\n    \"\"\"Compute unbiased estimator of expected ``min(x[1:m])`` on a data set.\n\n    Parameters\n    ----------\n    x : :class:`numpy:numpy.ndarray` of shape (n,)\n        Data set we would like expected ``min(x[1:m])`` on. Require ``len(x) >= 1``.\n    m : `int` or :class:`numpy:numpy.ndarray` with dtype `int`\n        This function is for estimating the expected minimum over `m` iid draws. Require ``m >= 1``. This can be\n        broadcasted. If ``m > n``, the weights will be nan, because there is no way to get unbiased estimate in that\n        case.\n\n    Returns\n    -------\n    E_min_x : float\n        Unbiased estimate of mean min of `m` draws from distribution on `x`.\n    \"\"\"\n    x = np.asarray(x)\n    E_min_x = -expected_max(-x, m)\n    return E_min_x\n"
  },
  {
    "path": "bayesmark/experiment.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Perform a study.\n\"\"\"\nimport json\nimport logging\nimport random as pyrandom\nimport uuid\nimport warnings\nfrom collections import OrderedDict\nfrom time import sleep, time\n\nimport numpy as np\nimport xarray as xr\n\nimport bayesmark.cmd_parse as cmd\nimport bayesmark.constants as cc\nimport bayesmark.random_search as rs\nfrom bayesmark.builtin_opt.config import CONFIG\nfrom bayesmark.cmd_parse import CmdArgs\nfrom bayesmark.constants import ARG_DELIM, ITER, OBJECTIVE, SUGGEST\nfrom bayesmark.data import METRICS_LOOKUP, get_problem_type\nfrom bayesmark.np_util import argmin_2d, linear_rescale, random_seed\nfrom bayesmark.serialize import XRSerializer\nfrom bayesmark.signatures import analyze_signature_pair, get_func_signature\nfrom bayesmark.sklearn_funcs import SklearnModel, SklearnSurrogate\nfrom bayesmark.space import JointSpace\nfrom bayesmark.util import chomp, str_join_safe\n\nlogger = logging.getLogger(__name__)\n\n# For now treat the objective names as global const. However, in the future these could vary by type of problem.\nOBJECTIVE_NAMES = SklearnModel.objective_names\n\n\ndef _build_test_problem(model_name, dataset, scorer, path):\n    \"\"\"Build the class with the class to use an objective. Sort of a factory.\n\n    Parameters\n    ----------\n    model_name : str\n        Which sklearn model we are attempting to tune, must be an element of `constants.MODEL_NAMES`.\n    dataset : str\n        Which data set the model is being tuned to, which must be either a) an element of\n        `constants.DATA_LOADER_NAMES`, or b) the name of a csv file in the `data_root` folder for a custom data set.\n    scorer : str\n        Which metric to use when evaluating the model. This must be an element of `sklearn_funcs.SCORERS_CLF` for\n        classification models, or `sklearn_funcs.SCORERS_REG` for regression models.\n    path : str or None\n        Absolute path to folder containing custom data sets/pickle files with surrogate model.\n\n    Returns\n    -------\n    prob : :class:`.sklearn_funcs.TestFunction`\n        The test function to evaluate in experiments.\n    \"\"\"\n    if model_name.endswith(\"-surr\"):\n        # Requires IO to test these, so will add the pargma here. Maybe that points towards a possible design change.\n        model_name = chomp(model_name, \"-surr\")  # pragma: io\n        prob = SklearnSurrogate(model_name, dataset, scorer, path=path)  # pragma: io\n    else:\n        prob = SklearnModel(model_name, dataset, scorer, data_root=path)\n    return prob\n\n\ndef run_study(optimizer, test_problem, n_calls, n_suggestions, n_obj=1, callback=None):\n    \"\"\"Run a study for a single optimizer on a single test problem.\n\n    This function can be used for benchmarking on general stateless objectives (not just `sklearn`).\n\n    Parameters\n    ----------\n    optimizer : :class:`.abstract_optimizer.AbstractOptimizer`\n        Instance of one of the wrapper optimizers.\n    test_problem : :class:`.sklearn_funcs.TestFunction`\n        Instance of test function to attempt to minimize.\n    n_calls : int\n        How many iterations of minimization to run.\n    n_suggestions : int\n        How many parallel evaluation we run each iteration. Must be ``>= 1``.\n    n_obj : int\n        Number of different objectives measured, only objective 0 is seen by optimizer. Must be ``>= 1``.\n    callback : callable\n        Optional callback taking the current best function evaluation, and the number of iterations finished. Takes\n        array of shape `(n_obj,)`.\n\n    Returns\n    -------\n    function_evals : :class:`numpy:numpy.ndarray` of shape (n_calls, n_suggestions, n_obj)\n        Value of objective for each evaluation.\n    timing_evals : (:class:`numpy:numpy.ndarray`, :class:`numpy:numpy.ndarray`, :class:`numpy:numpy.ndarray`)\n        Tuple of 3 timing results: ``(suggest_time, eval_time, observe_time)`` with shapes ``(n_calls,)``,\n        ``(n_calls, n_suggestions)``, and ``(n_calls,)``. These are the time to make each suggestion, the time for each\n        evaluation of the objective function, and the time to make an observe call.\n    suggest_log : list(list(dict(str, object)))\n        Log of the suggestions corresponding to the `function_evals`.\n    \"\"\"\n    assert n_suggestions >= 1, \"batch size must be at least 1\"\n    assert n_obj >= 1, \"Must be at least one objective\"\n\n    space_for_validate = JointSpace(test_problem.get_api_config())\n\n    if callback is not None:\n        # First do initial log at inf score, in case we don't even get to first eval before crash/job timeout\n        callback(np.full((n_obj,), np.inf, dtype=float), 0)\n\n    suggest_time = np.zeros(n_calls)\n    observe_time = np.zeros(n_calls)\n    eval_time = np.zeros((n_calls, n_suggestions))\n    function_evals = np.zeros((n_calls, n_suggestions, n_obj))\n    suggest_log = [None] * n_calls\n    for ii in range(n_calls):\n        tt = time()\n        try:\n            next_points = optimizer.suggest(n_suggestions)\n        except Exception as e:\n            logger.warning(\"Failure in optimizer suggest. Falling back to random search.\")\n            logger.exception(e, exc_info=True)\n            print(json.dumps({\"optimizer_suggest_exception\": {ITER: ii}}))\n            api_config = test_problem.get_api_config()\n            next_points = rs.suggest_dict([], [], api_config, n_suggestions=n_suggestions)\n        suggest_time[ii] = time() - tt\n\n        logger.info(\"suggestion time taken %f iter %d next_points %s\" % (suggest_time[ii], ii, str(next_points)))\n        assert len(next_points) == n_suggestions, \"invalid number of suggestions provided by the optimizer\"\n\n        # We could put this inside the TestProblem class, but ok here for now.\n        try:\n            space_for_validate.validate(next_points)  # Fails if suggestions outside allowed range\n        except Exception:\n            raise ValueError(\"Optimizer suggestion is out of range.\")\n\n        for jj, next_point in enumerate(next_points):\n            tt = time()\n            try:\n                f_current_eval = test_problem.evaluate(next_point)\n            except Exception as e:\n                logger.warning(\"Failure in function eval. Setting to inf.\")\n                logger.exception(e, exc_info=True)\n                f_current_eval = np.full((n_obj,), np.inf, dtype=float)\n            eval_time[ii, jj] = time() - tt\n            assert np.shape(f_current_eval) == (n_obj,)\n\n            suggest_log[ii] = next_points\n            function_evals[ii, jj, :] = f_current_eval\n            logger.info(\n                \"function_evaluation time %f value %f suggestion %s\"\n                % (eval_time[ii, jj], f_current_eval[0], str(next_point))\n            )\n\n        # Note: this could be inf in the event of a crash in f evaluation, the optimizer must be able to handle that.\n        # Only objective 0 is seen by optimizer.\n        eval_list = function_evals[ii, :, 0].tolist()\n\n        if callback is not None:\n            idx_ii, idx_jj = argmin_2d(function_evals[: ii + 1, :, 0])\n            callback(function_evals[idx_ii, idx_jj, :], ii + 1)\n\n        tt = time()\n        try:\n            optimizer.observe(next_points, eval_list)\n        except Exception as e:\n            logger.warning(\"Failure in optimizer observe. Ignoring these observations.\")\n            logger.exception(e, exc_info=True)\n            print(json.dumps({\"optimizer_observe_exception\": {ITER: ii}}))\n        observe_time[ii] = time() - tt\n\n        logger.info(\n            \"observation time %f, current best %f at iter %d\"\n            % (observe_time[ii], np.min(function_evals[: ii + 1, :, 0]), ii)\n        )\n\n    return function_evals, (suggest_time, eval_time, observe_time), suggest_log\n\n\ndef run_sklearn_study(\n    opt_class, opt_kwargs, model_name, dataset, scorer, n_calls, n_suggestions, data_root=None, callback=None\n):\n    \"\"\"Run a study for a single optimizer on a single `sklearn` model/data set combination.\n\n    This routine is meant for benchmarking when tuning `sklearn` models, as opposed to the more general\n    :func:`.run_study`.\n\n    Parameters\n    ----------\n    opt_class : :class:`.abstract_optimizer.AbstractOptimizer`\n        Type of wrapper optimizer must be subclass of :class:`.abstract_optimizer.AbstractOptimizer`.\n    opt_kwargs : kwargs\n        `kwargs` to use when instantiating the wrapper class.\n    model_name : str\n        Which sklearn model we are attempting to tune, must be an element of `constants.MODEL_NAMES`.\n    dataset : str\n        Which data set the model is being tuned to, which must be either a) an element of\n        `constants.DATA_LOADER_NAMES`, or b) the name of a csv file in the `data_root` folder for a custom data set.\n    scorer : str\n        Which metric to use when evaluating the model. This must be an element of `sklearn_funcs.SCORERS_CLF` for\n        classification models, or `sklearn_funcs.SCORERS_REG` for regression models.\n    n_calls : int\n        How many iterations of minimization to run.\n    n_suggestions : int\n        How many parallel evaluation we run each iteration. Must be ``>= 1``.\n    data_root : str\n        Absolute path to folder containing custom data sets. This may be ``None`` if no custom data sets are used.``\n    callback : callable\n        Optional callback taking the current best function evaluation, and the number of iterations finished. Takes\n        array of shape `(n_obj,)`.\n\n    Returns\n    -------\n    function_evals : :class:`numpy:numpy.ndarray` of shape (n_calls, n_suggestions, n_obj)\n        Value of objective for each evaluation.\n    timing_evals : (:class:`numpy:numpy.ndarray`, :class:`numpy:numpy.ndarray`, :class:`numpy:numpy.ndarray`)\n        Tuple of 3 timing results: ``(suggest_time, eval_time, observe_time)`` with shapes ``(n_calls,)``,\n        ``(n_calls, n_suggestions)``, and ``(n_calls,)``. These are the time to make each suggestion, the time for each\n        evaluation of the objective function, and the time to make an observe call.\n    suggest_log : list(list(dict(str, object)))\n        Log of the suggestions corresponding to the `function_evals`.\n    \"\"\"\n    # Setup test function\n    function_instance = _build_test_problem(model_name, dataset, scorer, data_root)\n\n    # Setup optimizer\n    api_config = function_instance.get_api_config()\n    optimizer_instance = opt_class(api_config, **opt_kwargs)\n\n    assert function_instance.objective_names == OBJECTIVE_NAMES\n    assert OBJECTIVE_NAMES[0] == cc.VISIBLE_TO_OPT\n    n_obj = len(OBJECTIVE_NAMES)\n\n    # Now actually do the experiment\n    function_evals, timing, suggest_log = run_study(\n        optimizer_instance, function_instance, n_calls, n_suggestions, n_obj=n_obj, callback=callback\n    )\n    return function_evals, timing, suggest_log\n\n\ndef get_objective_signature(model_name, dataset, scorer, data_root=None):\n    \"\"\"Get signature of an objective function specified by an sklearn model and dataset.\n\n    This routine specializes :func:`.signatures.get_func_signature` for the `sklearn` study case.\n\n    Parameters\n    ----------\n    model_name : str\n        Which sklearn model we are attempting to tune, must be an element of `constants.MODEL_NAMES`.\n    dataset : str\n        Which data set the model is being tuned to, which must be either a) an element of\n        `constants.DATA_LOADER_NAMES`, or b) the name of a csv file in the `data_root` folder for a custom data set.\n    scorer : str\n        Which metric to use when evaluating the model. This must be an element of `sklearn_funcs.SCORERS_CLF` for\n        classification models, or `sklearn_funcs.SCORERS_REG` for regression models.\n    data_root : str\n        Absolute path to folder containing custom data sets. This may be ``None`` if no custom data sets are used.``\n\n    Returns\n    -------\n    signature : list(str)\n        The signature of this test function.\n    \"\"\"\n    function_instance = _build_test_problem(model_name, dataset, scorer, data_root)\n    api_config = function_instance.get_api_config()\n    signature = get_func_signature(function_instance.evaluate, api_config)\n    return signature\n\n\ndef build_eval_ds(function_evals, objective_names):\n    \"\"\"Convert :class:`numpy:numpy.ndarray` with function evaluations to :class:`xarray:xarray.Dataset`.\n\n    This function is a data cleanup routine after running an experiment, before serializing the data to end the study.\n\n    Parameters\n    ----------\n    function_evals : :class:`numpy:numpy.ndarray` of shape (n_calls, n_suggestions, n_obj)\n        Value of objective for each evaluation.\n    objective_names : list(str) of shape (n_obj,)\n        The names of each objective.\n\n    Returns\n    -------\n    eval_ds : :class:`xarray:xarray.Dataset`\n        :class:`xarray:xarray.Dataset` containing one variable for each objective with the objective function\n        evaluations. It has dimensions ``(ITER, SUGGEST)``.\n    \"\"\"\n    n_call, n_suggest, n_obj = np.shape(function_evals)\n    assert len(objective_names) == n_obj\n    assert len(set(objective_names)) == n_obj, \"Objective names must be unique\"\n\n    coords = {ITER: range(n_call), SUGGEST: range(n_suggest), OBJECTIVE: list(objective_names)}\n    dims = (ITER, SUGGEST, OBJECTIVE)\n    da = xr.DataArray(data=function_evals, coords=coords, dims=dims)\n    eval_ds = da.to_dataset(dim=OBJECTIVE)\n    return eval_ds\n\n\ndef build_timing_ds(suggest_time, eval_time, observe_time):\n    \"\"\"Convert :class:`numpy:numpy.ndarray` with timing evaluations to :class:`xarray:xarray.Dataset`.\n\n    This function is a data cleanup routine after running an experiment, before serializing the data to end the study.\n\n    Parameters\n    ----------\n    suggest_time : :class:`numpy:numpy.ndarray` of shape (n_calls,)\n        The time to make each (batch) suggestion.\n    eval_time : :class:`numpy:numpy.ndarray` of shape (n_calls, n_suggestions)\n        The time for each evaluation of the objective function.\n    observe_time : :class:`numpy:numpy.ndarray` of shape (n_calls,)\n        The time for each (batch) evaluation of the objective function, and the time to make an observe call.\n\n    Returns\n    -------\n    time_ds : :class:`xarray:xarray.Dataset`\n        Dataset with variables ``(SUGGEST_PHASE, EVAL_PHASE, OBS_PHASE)`` which have dimensions ``(ITER,)``,\n        ``(ITER, SUGGEST)``, and ``(ITER,)``, respectively. The variable `EVAL_PHASE` has the function evaluation time\n        for each parallel suggestion.\n    \"\"\"\n    n_call, n_suggest = np.shape(eval_time)\n    assert np.shape(suggest_time) == (n_call,)\n    assert np.shape(observe_time) == (n_call,)\n\n    coords = OrderedDict([(ITER, range(n_call)), (SUGGEST, range(n_suggest))])\n\n    data = OrderedDict()\n    data[cc.SUGGEST_PHASE] = ((ITER,), suggest_time)\n    data[cc.EVAL_PHASE] = ((ITER, SUGGEST), eval_time)\n    data[cc.OBS_PHASE] = ((ITER,), observe_time)\n\n    time_ds = xr.Dataset(data, coords=coords)\n    return time_ds\n\n\ndef build_suggest_ds(suggest_log):\n    \"\"\"Convert :class:`numpy:numpy.ndarray` with function evaluation inputs to :class:`xarray:xarray.Dataset`.\n\n    This function is a data cleanup routine after running an experiment, before serializing the data to end the study.\n\n    Parameters\n    ----------\n    suggest_log : list(list(dict(str, object)))\n        Log of the suggestions. It has shape `(n_call, n_suggest)`.\n\n    Returns\n    -------\n    suggest_ds : :class:`xarray:xarray.Dataset`\n        :class:`xarray:xarray.Dataset` containing one variable for each input with the objective function evaluations.\n        It has dimensions ``(ITER, SUGGEST)``.\n    \"\"\"\n    n_call, n_suggest = np.shape(suggest_log)\n    assert n_call * n_suggest > 0\n\n    # Setup the dims\n    ds_vars = sorted(suggest_log[0][0].keys())\n    coords = OrderedDict([(ITER, range(n_call)), (SUGGEST, range(n_suggest))])\n\n    # There is prob a way to vectorize this more but good enough for now. Using np.full to infer dtype from 1st element\n    data = OrderedDict([(kk, ((ITER, SUGGEST), np.full((n_call, n_suggest), suggest_log[0][0][kk]))) for kk in ds_vars])\n    for ii in range(n_call):\n        for jj in range(n_suggest):\n            for kk in ds_vars:\n                data[kk][1][ii, jj] = suggest_log[ii][jj][kk]\n\n    suggest_ds = xr.Dataset(data, coords=coords)\n    return suggest_ds\n\n\ndef load_optimizer_kwargs(optimizer_name, opt_root):  # pragma: io\n    \"\"\"Load the kwarg options for this optimizer being tested.\n\n    This is part of the general experiment setup before a study.\n\n    Parameters\n    ----------\n    optimizer_name : str\n        Name of the optimizer being tested. This optimizer name must be present in optimizer config file.\n    opt_root : str\n        Absolute path to folder containing the config file.\n\n    Returns\n    -------\n    kwargs : dict(str, object)\n        The kwargs setting to pass into the optimizer wrapper constructor.\n    \"\"\"\n    if optimizer_name in CONFIG:\n        _, kwargs = CONFIG[optimizer_name]\n    else:\n        settings = cmd.load_optimizer_settings(opt_root)\n        assert optimizer_name in settings, \"optimizer %s not found in settings file %s\" % optimizer_name\n        _, kwargs = settings[optimizer_name]\n    return kwargs\n\n\ndef _setup_seeds(hex_str):  # pragma: main\n    \"\"\"This function should only be called from main. Be careful with this function as it manipulates the global random\n    streams.\n\n    This is part of the general experiment setup before a study.\n\n    If torch becomes used in any of our optimizers then this will need to come back, could also do TF seed init.\n    ```\n    torch.manual_seed(random_seed(master_stream))\n    if torch.cuda.is_available():\n        torch.cuda.manual_seed(random_seed(master_stream))\n    ```\n    \"\"\"\n    # Set all random seeds: avoid correlated streams ==> must use diff seeds.\n    # Could use UUID class, but more direct to just convert the hex to py int.\n    # pyrandom is better for master because it is not limited to 32-bit seeds.\n    master_stream = pyrandom.Random(int(hex_str, 16))\n    pyrandom.seed(random_seed(master_stream))\n    np.random.seed(random_seed(master_stream))\n\n\ndef experiment_main(opt_class, args=None):  # pragma: main\n    \"\"\"This is in effect the `main` routine for this experiment. However, it is called from the optimizer wrapper file\n    so the class can be passed in. The optimizers are assumed to be outside the package, so the optimizer class can't\n    be named from inside the main function without using hacky stuff like `eval`.\n    \"\"\"\n    if args is None:\n        description = \"Run a study with one benchmark function and an optimizer\"\n        args = cmd.parse_args(cmd.experiment_parser(description))\n    args[CmdArgs.opt_rev] = opt_class.get_version()\n\n    run_uuid = uuid.UUID(args[CmdArgs.uuid])\n\n    logging.captureWarnings(True)\n\n    # Setup logging to both a file and stdout (if verbose is set to True)\n    logger.setLevel(logging.INFO)  # Note this is the module-wide logger\n    logfile = XRSerializer.logging_path(args[CmdArgs.db_root], args[CmdArgs.db], run_uuid)\n    logger_file_handler = logging.FileHandler(logfile, mode=\"w\")\n    logger.addHandler(logger_file_handler)\n    if args[CmdArgs.verbose]:\n        logger.addHandler(logging.StreamHandler())\n\n    warnings_logger = logging.getLogger(\"py.warnings\")\n    warnings_logger.addHandler(logger_file_handler)\n    if args[CmdArgs.verbose]:\n        warnings_logger.addHandler(logging.StreamHandler())\n\n    logger.info(\"running: %s\" % str(cmd.serializable_dict(args)))\n    logger.info(\"cmd: %s\" % cmd.cmd_str())\n\n    assert (\n        args[CmdArgs.metric] in METRICS_LOOKUP[get_problem_type(args[CmdArgs.data])]\n    ), \"reg/clf metrics can only be used on compatible dataset\"\n\n    # Setup random streams for computing the signature, must use same seed\n    # across all runs to ensure signature is consistent. This seed is random:\n    _setup_seeds(\"7e9f2cabb0dd4f44bc10cf18e440b427\")  # pragma: allowlist secret\n    signature = get_objective_signature(\n        args[CmdArgs.classifier], args[CmdArgs.data], args[CmdArgs.metric], data_root=args[CmdArgs.data_root]\n    )\n    logger.info(\"computed signature: %s\" % str(signature))\n\n    opt_kwargs = load_optimizer_kwargs(args[CmdArgs.optimizer], args[CmdArgs.optimizer_root])\n\n    # Setup the call back for intermediate logging\n    if cc.BASELINE not in XRSerializer.get_derived_keys(args[CmdArgs.db_root], db=args[CmdArgs.db]):\n        warnings.warn(\"Baselines not found. Will not log intermediate scores.\")\n        callback = None\n    else:\n        test_case_str = SklearnModel.test_case_str(args[CmdArgs.classifier], args[CmdArgs.data], args[CmdArgs.metric])\n        optimizer_str = str_join_safe(ARG_DELIM, (args[CmdArgs.optimizer], args[CmdArgs.opt_rev], args[CmdArgs.rev]))\n\n        baseline_ds, baselines_meta = XRSerializer.load_derived(\n            args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.BASELINE\n        )\n\n        # Check the objective function signatures match in the baseline file\n        sig_errs, _ = analyze_signature_pair({test_case_str: signature[1]}, baselines_meta[\"signature\"])\n        logger.info(\"Signature errors:\\n%s\" % sig_errs.to_string())\n        print(json.dumps({\"exp sig errors\": sig_errs.T.to_dict()}))\n\n        def log_mean_score_json(evals, iters):\n            assert evals.shape == (len(OBJECTIVE_NAMES),)\n            assert not np.any(np.isnan(evals))\n\n            log_msg = {\n                cc.TEST_CASE: test_case_str,\n                cc.METHOD: optimizer_str,\n                cc.TRIAL: args[CmdArgs.uuid],\n                cc.ITER: iters,\n            }\n\n            for idx, obj in enumerate(OBJECTIVE_NAMES):\n                assert OBJECTIVE_NAMES[idx] == obj\n\n                # Extract relevant rescaling info\n                slice_ = {cc.TEST_CASE: test_case_str, OBJECTIVE: obj}\n                best_opt = baseline_ds[cc.PERF_BEST].sel(slice_, drop=True).values.item()\n                base_clip_val = baseline_ds[cc.PERF_CLIP].sel(slice_, drop=True).values.item()\n\n                # Perform the same rescaling as found in experiment_analysis.compute_aggregates()\n                score = linear_rescale(evals[idx], best_opt, base_clip_val, 0.0, 1.0, enforce_bounds=False)\n                # Also, clip the score from below at -1 to limit max influence of single run on final average\n                score = np.clip(score, -1.0, 1.0)\n                score = score.item()  # Make easiest for logging in JSON\n                assert isinstance(score, float)\n\n                # Note: This is not the raw score but the rescaled one!\n                log_msg[obj] = score\n            log_msg = json.dumps(log_msg)\n            print(log_msg, flush=True)\n            # One second safety delay to protect against subprocess stdout getting lost\n            sleep(1)\n\n        callback = log_mean_score_json\n\n    # Now set the seeds for the actual experiment\n    _setup_seeds(args[CmdArgs.uuid])\n\n    # Now do the experiment\n    logger.info(\n        \"starting sklearn study %s %s %s %s %d %d\"\n        % (\n            args[CmdArgs.optimizer],\n            args[CmdArgs.classifier],\n            args[CmdArgs.data],\n            args[CmdArgs.metric],\n            args[CmdArgs.n_calls],\n            args[CmdArgs.n_suggest],\n        )\n    )\n    logger.info(\"with data root: %s\" % args[CmdArgs.data_root])\n    function_evals, timing, suggest_log = run_sklearn_study(\n        opt_class,\n        opt_kwargs,\n        args[CmdArgs.classifier],\n        args[CmdArgs.data],\n        args[CmdArgs.metric],\n        args[CmdArgs.n_calls],\n        args[CmdArgs.n_suggest],\n        data_root=args[CmdArgs.data_root],\n        callback=callback,\n    )\n\n    # Curate results into clean dataframes\n    eval_ds = build_eval_ds(function_evals, OBJECTIVE_NAMES)\n    time_ds = build_timing_ds(*timing)\n    suggest_ds = build_suggest_ds(suggest_log)\n\n    # setup meta:\n    meta = {\"args\": cmd.serializable_dict(args), \"signature\": signature}\n    logger.info(\"saving meta data: %s\" % str(meta))\n\n    # Now the final IO to export the results\n    logger.info(\"saving results\")\n    XRSerializer.save(eval_ds, meta, args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.EVAL, uuid_=run_uuid)\n\n    logger.info(\"saving timing\")\n    XRSerializer.save(time_ds, meta, args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.TIME, uuid_=run_uuid)\n\n    logger.info(\"saving suggest log\")\n    XRSerializer.save(suggest_ds, meta, args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.SUGGEST_LOG, uuid_=run_uuid)\n\n    logger.info(\"done\")\n\n\ndef _get_opt_class(opt_name):\n    \"\"\"Load the relevant wrapper class based on this optimizer name.\n\n    There is inherently a bit ugly, but is only called at the main() level before the inner workings get going. There\n    are a few ways to do this with some pro and con:\n    1) The way done here: based on the filename, load that module via conditional imports and if-else. cons:\n        - uses conditional imports\n        - must manually repeat yourself in the if-else, but these are checked in unit testing\n    2) Import everything and then pick the right optimizer based on a dict of name_str -> class. cons:\n        - loads every dependency no matter which is used so could be slow\n        - also a stupid dependency might change global state in a way that corrupts experiments\n    3) Use the wrapper file as the entry point and add that to setup.py. cons:\n        - Will clutter the CLI namespace with one command for each wrapper\n    4) Use importlib to import the specified file. cons:\n        - Makes assumptions about relative path structure. For pip-installed packages, probably safer to let python\n        find the file via import.\n    This option (1) seems least objectionable. However, this function could easily be switched to use importlib without\n    any changes elsewhere.\n    \"\"\"\n    wrapper_file, _ = CONFIG[opt_name]\n\n    if wrapper_file == \"hyperopt_optimizer.py\":\n        import bayesmark.builtin_opt.hyperopt_optimizer as opt\n    elif wrapper_file == \"nevergrad_optimizer.py\":\n        import bayesmark.builtin_opt.nevergrad_optimizer as opt\n    elif wrapper_file == \"opentuner_optimizer.py\":\n        import bayesmark.builtin_opt.opentuner_optimizer as opt\n    elif wrapper_file == \"pysot_optimizer.py\":\n        import bayesmark.builtin_opt.pysot_optimizer as opt\n    elif wrapper_file == \"random_optimizer.py\":\n        import bayesmark.builtin_opt.random_optimizer as opt\n    elif wrapper_file == \"scikit_optimizer.py\":\n        import bayesmark.builtin_opt.scikit_optimizer as opt\n    else:\n        assert False, \"CONFIG for built in optimizers has added a new optimizer, but not updated this function.\"\n\n    opt_class = opt.opt_wrapper\n    return opt_class\n\n\ndef main():  # pragma: main\n    \"\"\"This is where experiments happen. Usually called by the experiment launcher.\"\"\"\n    description = \"Run a study with one benchmark function and an optimizer\"\n    args = cmd.parse_args(cmd.experiment_parser(description))\n\n    opt_class = _get_opt_class(args[CmdArgs.optimizer])\n    experiment_main(opt_class, args=args)\n\n\nif __name__ == \"__main__\":\n    main()  # pragma: main\n"
  },
  {
    "path": "bayesmark/experiment_aggregate.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Aggregate the results of many studies to prepare analysis.\n\"\"\"\nimport json\nimport logging\nfrom collections import Counter\n\nimport numpy as np\nimport xarray as xr\n\nimport bayesmark.constants as cc\nimport bayesmark.xr_util as xru\nfrom bayesmark.cmd_parse import CmdArgs, agg_parser, parse_args, serializable_dict, unserializable_dict\nfrom bayesmark.constants import ARG_DELIM, EVAL_RESULTS, ITER, METHOD, SUGGEST, TEST_CASE, TIME_RESULTS, TRIAL\nfrom bayesmark.serialize import XRSerializer\nfrom bayesmark.signatures import analyze_signatures\nfrom bayesmark.sklearn_funcs import SklearnModel\nfrom bayesmark.util import str_join_safe\n\nlogger = logging.getLogger(__name__)\n\n\ndef validate_time(all_time):\n    \"\"\"Validate the aggregated time data set.\"\"\"\n    assert isinstance(all_time, xr.Dataset)\n    assert all_time[cc.SUGGEST_PHASE].dims == (ITER,)\n    assert all_time[cc.EVAL_PHASE].dims == (ITER, SUGGEST)\n    assert all_time[cc.OBS_PHASE].dims == (ITER,)\n    assert xru.is_simple_coords(all_time.coords, min_side=1)\n\n\ndef validate_perf(perf_da):\n    \"\"\"Validate the input eval data arrays.\"\"\"\n    assert isinstance(perf_da, xr.Dataset)\n    assert perf_da.dims == (ITER, SUGGEST)\n    assert xru.is_simple_coords(perf_da.coords)\n    assert not np.any(np.isnan(perf_da.values))\n\n\ndef validate_agg_perf(perf_da, min_trial=1):\n    \"\"\"Validate the aggregated eval data set.\"\"\"\n    assert isinstance(perf_da, xr.DataArray)\n    assert perf_da.dims == (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)\n    assert xru.is_simple_coords(perf_da.coords, dims=(ITER, SUGGEST, TRIAL))\n    assert not np.any(np.isnan(perf_da.values))\n    assert perf_da.sizes[TRIAL] >= min_trial\n\n\ndef summarize_time(all_time):\n    \"\"\"Transform a single timing dataset from an experiment into a form better for aggregation.\n\n    Parameters\n    ----------\n    all_time : :class:`xarray:xarray.Dataset`\n        Dataset with variables ``(SUGGEST_PHASE, EVAL_PHASE, OBS_PHASE)`` which have dimensions ``(ITER,)``,\n        ``(ITER, SUGGEST)``, and ``(ITER,)``, respectively. The variable `EVAL_PHASE` has the function evaluation time\n        for each parallel suggestion.\n\n    Returns\n    -------\n    time_summary : :class:`xarray:xarray.Dataset`\n        Dataset with variables ``(SUGGEST_PHASE, OBS_PHASE, EVAL_PHASE_MAX, EVAL_PHASE_SUM)`` which all have dimensions\n        ``(ITER,)``. The maximum `EVAL_PHASE_MAX` is relevant for wall clock time, while `EVAL_PHASE_SUM` is relevant\n        for CPU time.\n    \"\"\"\n    validate_time(all_time)\n\n    time_summary = xr.Dataset(coords=all_time.coords)\n\n    time_summary[cc.SUGGEST_PHASE] = all_time[cc.SUGGEST_PHASE]\n    time_summary[cc.OBS_PHASE] = all_time[cc.OBS_PHASE]\n    time_summary[cc.EVAL_PHASE_MAX] = all_time[cc.EVAL_PHASE].max(dim=SUGGEST)\n    time_summary[cc.EVAL_PHASE_SUM] = all_time[cc.EVAL_PHASE].sum(dim=SUGGEST)\n    return time_summary\n\n\ndef concat_experiments(all_experiments, ravel=False):\n    \"\"\"Aggregate the Datasets from a series of experiments into combined Dataset.\n\n    Parameters\n    ----------\n    all_experiments : typing.Iterable\n        Iterable (possible from a generator) with the Datasets from each experiment. Each item in `all_experiments` is\n        a pair containing ``(meta_data, data)``. See `load_experiments` for details on these variables,\n    ravel : bool\n        If true, ravel all studies to store batch suggestions as if they were serial.\n\n    Returns\n    -------\n    all_perf : :class:`xarray:xarray.Dataset`\n        DataArray containing all of the `perf_da` from the experiments. The meta-data from the experiments are included\n        as extra dimensions. `all_perf` has dimensions ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)``. To convert the\n        `uuid` to a trial, there must be an equal number of repetition in the experiments for each `TEST_CASE`,\n        `METHOD` combination. Likewise, all of the experiments need an equal number of `ITER` and `SUGGEST`. If `ravel`\n        is true, then the `SUGGEST` is singleton.\n    all_time : :class:`xarray:xarray.Dataset`\n        Dataset containing all of the `time_ds` from the experiments. The new dimensions are\n        ``(ITER, TEST_CASE, METHOD, TRIAL)``. It has the same variables as `time_ds`.\n    all_suggest : :class:`xarray:xarray.Dataset`\n        DataArray containing all of the `suggest_ds` from the experiments. It has dimensions\n        ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)``.\n    all_sigs : dict(str, list(list(float)))\n        Aggregate of all experiment signatures.\n    \"\"\"\n    all_perf = {}\n    all_time = {}\n    all_suggest = {}\n    all_sigs = {}\n    trial_counter = Counter()\n    for (test_case, optimizer, uuid), (perf_ds, time_ds, suggest_ds, sig) in all_experiments:\n        if ravel:\n            raise NotImplementedError(\"ravel is deprecated. Just reshape in analysis steps instead.\")\n\n        case_key = (test_case, optimizer, trial_counter[(test_case, optimizer)])\n        trial_counter[(test_case, optimizer)] += 1\n\n        # Process perf data\n        assert all(perf_ds[kk].dims == (ITER, SUGGEST) for kk in perf_ds)\n        all_perf[case_key] = perf_ds\n\n        # Process time data\n        all_time[case_key] = summarize_time(time_ds)\n\n        # Process suggestion data\n        all_suggest_curr = all_suggest.setdefault(test_case, {})\n        all_suggest_curr[case_key] = suggest_ds\n\n        # Handle the signatures\n        all_sigs.setdefault(test_case, []).append(sig)\n    assert min(trial_counter.values()) == max(trial_counter.values()), \"Uneven number of trials per test case\"\n\n    # Now need to concat dict of datasets into single dataset\n    all_perf = xru.ds_concat(all_perf, dims=(TEST_CASE, METHOD, TRIAL))\n    assert all(all_perf[kk].dims == (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL) for kk in all_perf)\n    assert not any(\n        np.any(np.isnan(all_perf[kk].values)) for kk in all_perf\n    ), \"Missing combinations of method and test case\"\n\n    all_time = xru.ds_concat(all_time, dims=(TEST_CASE, METHOD, TRIAL))\n    assert all(all_time[kk].dims == (ITER, TEST_CASE, METHOD, TRIAL) for kk in all_time)\n    assert not any(np.any(np.isnan(all_time[kk].values)) for kk in all_time)\n    assert xru.coord_compat((all_perf, all_time), (ITER, TEST_CASE, METHOD, TRIAL))\n\n    for test_case in all_suggest:\n        all_suggest[test_case] = xru.ds_concat(all_suggest[test_case], dims=(TEST_CASE, METHOD, TRIAL))\n        assert all(\n            all_suggest[test_case][kk].dims == (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)\n            for kk in all_suggest[test_case]\n        )\n        assert not any(np.any(np.isnan(all_suggest[test_case][kk].values)) for kk in all_suggest[test_case])\n        assert xru.coord_compat((all_perf, all_suggest[test_case]), (ITER, METHOD, TRIAL))\n        assert all_suggest[test_case].coords[TEST_CASE].shape == (1,), \"test case should be singleton\"\n\n    return all_perf, all_time, all_suggest, all_sigs\n\n\ndef load_experiments(uuid_list, db_root, dbid):  # pragma: io\n    \"\"\"Generator to load the results of the experiments.\n\n    Parameters\n    ----------\n    uuid_list : list(uuid.UUID)\n        List of UUIDs corresponding to experiments to load.\n    db_root : str\n        Root location for data store as requested by the serializer used.\n    dbid : str\n        Name of the data store as requested by the serializer used.\n\n    Yields\n    ------\n    meta_data : (str, str, str)\n        The `meta_data` contains a `tuple` of `str` with ``test_case, optimizer, uuid``.\n    data : (:class:`xarray:xarray.Dataset`, :class:`xarray:xarray.Dataset`, :class:`xarray:xarray.Dataset` list(float))\n        The `data` contains a tuple of ``(perf_ds, time_ds, suggest_ds, sig)``. The `perf_ds` is a\n        :class:`xarray:xarray.Dataset` containing the evaluation results with dimensions ``(ITER, SUGGEST)``, each\n        variable is an objective. The `time_ds` is an :class:`xarray:xarray.Dataset` containing the timing results of\n        the form accepted by `summarize_time`. The coordinates must be compatible with `perf_ds`. The suggest_ds is a\n        :class:`xarray:xarray.Dataset` containing the inputs to the function evaluations. Each variable is a function\n        input. Finally, `sig` contains the `test_case` signature and must be `list(float)`.\n    \"\"\"\n    uuids_seen = set()\n    for uuid_ in uuid_list:\n        logger.info(uuid_.hex)\n\n        # Load perf and timing data\n        perf_ds, meta = XRSerializer.load(db_root, db=dbid, key=cc.EVAL, uuid_=uuid_)\n        time_ds, meta_t = XRSerializer.load(db_root, db=dbid, key=cc.TIME, uuid_=uuid_)\n        assert meta == meta_t, \"meta data should between time and eval files\"\n        suggest_ds, meta_t = XRSerializer.load(db_root, db=dbid, key=cc.SUGGEST_LOG, uuid_=uuid_)\n        assert meta == meta_t, \"meta data should between suggest and eval files\"\n\n        # Get signature to pass out as well\n        _, sig = meta[\"signature\"]\n        logger.info(meta)\n        logger.info(sig)\n\n        # Build the new indices for combined data, this could be put in function for easier testing\n        eval_args = unserializable_dict(meta[\"args\"])  # Unpack meta-data\n        test_case = SklearnModel.test_case_str(\n            eval_args[CmdArgs.classifier], eval_args[CmdArgs.data], eval_args[CmdArgs.metric]\n        )\n        optimizer = str_join_safe(\n            ARG_DELIM, (eval_args[CmdArgs.optimizer], eval_args[CmdArgs.opt_rev], eval_args[CmdArgs.rev])\n        )\n        args_uuid = eval_args[CmdArgs.uuid]\n\n        # Check UUID sanity\n        assert isinstance(args_uuid, str)\n        assert args_uuid == uuid_.hex, \"UUID meta-data does not match filename\"\n        assert args_uuid not in uuids_seen, \"uuids being reused between studies\"\n        uuids_seen.add(args_uuid)\n\n        # Return key -> data so this generator can be iterated over in dict like manner\n        meta_data = (test_case, optimizer, args_uuid)\n        data = (perf_ds, time_ds, suggest_ds, sig)\n        yield meta_data, data\n\n\ndef main():\n    \"\"\"See README for instructions on calling aggregate.\n    \"\"\"\n    description = \"Aggregate study results across functions and optimizers\"\n    args = parse_args(agg_parser(description))\n\n    logger.setLevel(logging.INFO)  # Note this is the module-wide logger\n    if args[CmdArgs.verbose]:\n        logger.addHandler(logging.StreamHandler())\n\n    # Get list of UUIDs\n    uuid_list = XRSerializer.get_uuids(args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.EVAL)\n    uuid_list_ = XRSerializer.get_uuids(args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.TIME)\n    assert uuid_list == uuid_list_, \"UUID list does not match between time and eval results\"\n    uuid_list_ = XRSerializer.get_uuids(args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.SUGGEST_LOG)\n    assert uuid_list == uuid_list_, \"UUID list does not match between suggest log and eval results\"\n\n    # Get iterator of all experiment data dumps, load in and process, and concat\n    data_G = load_experiments(uuid_list, args[CmdArgs.db_root], args[CmdArgs.db])\n    all_perf, all_time, all_suggest, all_sigs = concat_experiments(data_G, ravel=args[CmdArgs.ravel])\n\n    # Check the concat signatures make are coherent\n    sig_errs, signatures_median = analyze_signatures(all_sigs)\n    logger.info(\"Signature errors:\\n%s\" % sig_errs.to_string())\n    print(json.dumps({\"exp-agg sig errors\": sig_errs.T.to_dict()}))\n\n    # Dump and save it all out\n    logger.info(\"saving\")\n    meta = {\"args\": serializable_dict(args), \"signature\": signatures_median}\n    XRSerializer.save_derived(all_perf, meta, args[CmdArgs.db_root], db=args[CmdArgs.db], key=EVAL_RESULTS)\n    XRSerializer.save_derived(all_time, meta, args[CmdArgs.db_root], db=args[CmdArgs.db], key=TIME_RESULTS)\n    for test_case, ds in all_suggest.items():\n        XRSerializer.save_derived(ds, meta, args[CmdArgs.db_root], db=args[CmdArgs.db], key=test_case)\n\n    logger.info(\"done\")\n\n\nif __name__ == \"__main__\":\n    main()  # pragma: main\n"
  },
  {
    "path": "bayesmark/experiment_analysis.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Perform analysis to compare different optimizers across problems.\n\"\"\"\nimport json\nimport logging\nimport warnings\nfrom collections import OrderedDict\n\nimport numpy as np\nimport xarray as xr\n\nimport bayesmark.constants as cc\nimport bayesmark.quantiles as qt\nimport bayesmark.xr_util as xru\nfrom bayesmark.cmd_parse import CmdArgs, general_parser, parse_args, serializable_dict\nfrom bayesmark.constants import (\n    ITER,\n    LB_MEAN,\n    LB_MED,\n    LB_NORMED_MEAN,\n    METHOD,\n    NORMED_MEAN,\n    NORMED_MED,\n    OBJECTIVE,\n    PERF_BEST,\n    PERF_CLIP,\n    PERF_MEAN,\n    PERF_MED,\n    SUGGEST,\n    TEST_CASE,\n    TRIAL,\n    UB_MEAN,\n    UB_MED,\n    UB_NORMED_MEAN,\n)\nfrom bayesmark.experiment_aggregate import validate_agg_perf\nfrom bayesmark.experiment_baseline import do_baseline\nfrom bayesmark.np_util import cummin, linear_rescale\nfrom bayesmark.serialize import XRSerializer\nfrom bayesmark.signatures import analyze_signature_pair\nfrom bayesmark.stats import t_EB\n\n# Mathematical settings\nEVAL_Q = 0.5  # Evaluate based on median loss across n_trials\nALPHA = 0.05  # ==> 95% CIs\n\nlogger = logging.getLogger(__name__)\n\n\ndef get_perf_array(evals, evals_visible):\n    \"\"\"Get the actual (e.g., generalization loss) over iterations.\n\n    Parameters\n    ----------\n    evals : :class:`numpy:numpy.ndarray` of shape (n_iter, n_batch, n_trials)\n        The actual loss (e.g., generalization) for a given experiment.\n    evals_visible : :class:`numpy:numpy.ndarray` of shape (n_iter, n_batch, n_trials)\n        The observable loss (e.g., validation) for a given experiment.\n\n    Returns\n    -------\n    perf_array : :class:`numpy:numpy.ndarray` of shape (n_iter, n_trials)\n        The best performance so far at iteration i from `evals`. Where the best has been selected according to\n        `evals_visible`.\n    \"\"\"\n    n_iter, _, n_trials = evals.shape\n    assert evals.size > 0, \"perf array not supported for empty arrays\"\n    assert evals_visible.shape == evals.shape\n    assert not np.any(np.isnan(evals))\n    assert not np.any(np.isnan(evals_visible))\n\n    idx = np.argmin(evals_visible, axis=1)\n    perf_array = np.take_along_axis(evals, idx[:, None, :], axis=1).squeeze(axis=1)\n    assert perf_array.shape == (n_iter, n_trials)\n\n    visible_perf_array = np.min(evals_visible, axis=1)\n    assert visible_perf_array.shape == (n_iter, n_trials)\n\n    # Get the minimum from the visible loss\n    perf_array = cummin(perf_array, visible_perf_array)\n    return perf_array\n\n\ndef compute_aggregates(perf_da, baseline_ds, visible_perf_da=None):\n    \"\"\"Aggregate function evaluations in the experiments to get performance summaries of each method.\n\n    Parameters\n    ----------\n    perf_da : :class:`xarray:xarray.DataArray`\n        Aggregate experimental results with each function evaluation in the experiments according to true loss\n        (e.g., generalization). `perf_da` has dimensions ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)`` as is assumed\n        to have no nan values.\n    baseline_ds : :class:`xarray:xarray.Dataset`\n        Dataset with baseline performance. It was variables ``(PERF_MED, PERF_MEAN, PERF_CLIP, PERF_BEST)`` with\n        dimensions ``(ITER, TEST_CASE)``, ``(ITER, TEST_CASE)``, ``(TEST_CASE,)``, and ``(TEST_CASE,)``, respectively.\n        `PERF_MED` is a baseline of performance based on random search when using medians to summarize performance.\n        Likewise, `PERF_MEAN` is for means. `PERF_CLIP` is an upperbound to clip poor performance when using the mean.\n        `PERF_BEST` is an estimate on the global minimum.\n    visible_perf_da : :class:`xarray:xarray.DataArray`\n        Aggregate experimental results with each function evaluation in the experiments according to visible loss\n        (e.g., validation). `visible_perf_da` has dimensions ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)`` as is\n        assumed to have no nan values. If `None`, we set ``visible_perf_da = perf_da``.\n\n    Returns\n    -------\n    agg_result : :class:`xarray:xarray.Dataset`\n        Dataset with summary of performance for each method and test case combination. Contains variables:\n        ``(PERF_MED, LB_MED, UB_MED, NORMED_MED, PERF_MEAN, LB_MEAN, UB_MEAN, NORMED_MEAN)``\n        each with dimensions ``(ITER, METHOD, TEST_CASE)``. `PERF_MED` is a median summary of performance with `LB_MED`\n        and `UB_MED` as error bars. `NORMED_MED` is a rescaled `PERF_MED` so we expect the optimal performance is 0,\n        and random search gives 1 at all `ITER`. Likewise, `PERF_MEAN`, `LB_MEAN`, `UB_MEAN`, `NORMED_MEAN` are for\n        mean performance.\n    summary : :class:`xarray:xarray.Dataset`\n        Dataset with overall summary of performance of each method. Contains variables\n        ``(PERF_MED, LB_MED, UB_MED, PERF_MEAN, LB_MEAN, UB_MEAN)``\n        each with dimensions ``(ITER, METHOD)``.\n    \"\"\"\n    validate_agg_perf(perf_da, min_trial=1)\n\n    assert isinstance(baseline_ds, xr.Dataset)\n    assert tuple(baseline_ds[PERF_BEST].dims) == (TEST_CASE,)\n    assert tuple(baseline_ds[PERF_CLIP].dims) == (TEST_CASE,)\n    assert tuple(baseline_ds[PERF_MED].dims) == (ITER, TEST_CASE)\n    assert tuple(baseline_ds[PERF_MEAN].dims) == (ITER, TEST_CASE)\n    assert xru.coord_compat((perf_da, baseline_ds), (ITER, TEST_CASE))\n    assert not any(np.any(np.isnan(baseline_ds[kk].values)) for kk in baseline_ds)\n\n    # Now actually get the aggregate performance numbers per test case\n    agg_result = xru.ds_like(\n        perf_da,\n        (PERF_MED, LB_MED, UB_MED, NORMED_MED, PERF_MEAN, LB_MEAN, UB_MEAN, NORMED_MEAN),\n        (ITER, METHOD, TEST_CASE),\n    )\n    baseline_mean_da = xru.only_dataarray(xru.ds_like(perf_da, [\"ref\"], (ITER, TEST_CASE)))\n    # Using values here since just clearer to get raw items than xr object for func_name\n    for func_name in perf_da.coords[TEST_CASE].values:\n        rand_perf_med = baseline_ds[PERF_MED].sel({TEST_CASE: func_name}, drop=True).values\n        rand_perf_mean = baseline_ds[PERF_MEAN].sel({TEST_CASE: func_name}, drop=True).values\n        best_opt = baseline_ds[PERF_BEST].sel({TEST_CASE: func_name}, drop=True).values\n        base_clip_val = baseline_ds[PERF_CLIP].sel({TEST_CASE: func_name}, drop=True).values\n\n        assert np.all(np.diff(rand_perf_med) <= 0), \"Baseline should be decreasing with iteration\"\n        assert np.all(np.diff(rand_perf_mean) <= 0), \"Baseline should be decreasing with iteration\"\n        assert np.all(rand_perf_med > best_opt)\n        assert np.all(rand_perf_mean > best_opt)\n        assert np.all(rand_perf_mean <= base_clip_val)\n\n        baseline_mean_da.loc[{TEST_CASE: func_name}] = linear_rescale(\n            rand_perf_mean, best_opt, base_clip_val, 0.0, 1.0, enforce_bounds=False\n        )\n        for method_name in perf_da.coords[METHOD].values:\n            # Take the minimum over all suggestion at given iter + sanity check perf_da\n            curr_da = perf_da.sel({METHOD: method_name, TEST_CASE: func_name}, drop=True)\n            assert curr_da.dims == (ITER, SUGGEST, TRIAL)\n\n            if visible_perf_da is None:\n                perf_array = get_perf_array(curr_da.values, curr_da.values)\n\n                curr_da_ = perf_da.sel({METHOD: method_name, TEST_CASE: func_name}, drop=True).min(dim=SUGGEST)\n                assert curr_da_.dims == (ITER, TRIAL)\n                perf_array_ = np.minimum.accumulate(curr_da_.values, axis=0)\n                assert np.allclose(perf_array, perf_array_)\n            else:\n                curr_visible_da = visible_perf_da.sel({METHOD: method_name, TEST_CASE: func_name}, drop=True)\n                assert curr_visible_da.dims == (ITER, SUGGEST, TRIAL)\n                perf_array = get_perf_array(curr_da.values, curr_visible_da.values)\n\n            # Compute median perf and CI on it\n            med_perf, LB, UB = qt.quantile_and_CI(perf_array, EVAL_Q, alpha=ALPHA)\n            assert med_perf.shape == rand_perf_med.shape\n            agg_result[PERF_MED].loc[{TEST_CASE: func_name, METHOD: method_name}] = med_perf\n            agg_result[LB_MED].loc[{TEST_CASE: func_name, METHOD: method_name}] = LB\n            agg_result[UB_MED].loc[{TEST_CASE: func_name, METHOD: method_name}] = UB\n\n            # Now store normed version, which is better for aggregation\n            normed = linear_rescale(med_perf, best_opt, rand_perf_med, 0.0, 1.0, enforce_bounds=False)\n            agg_result[NORMED_MED].loc[{TEST_CASE: func_name, METHOD: method_name}] = normed\n\n            # Store normed mean version\n            normed = linear_rescale(perf_array, best_opt, base_clip_val, 0.0, 1.0, enforce_bounds=False)\n            # Also, clip the score from below at -1 to limit max influence of single run on final average\n            normed = np.clip(normed, -1.0, 1.0)\n            normed = np.mean(normed, axis=1)\n            agg_result[NORMED_MEAN].loc[{TEST_CASE: func_name, METHOD: method_name}] = normed\n\n            # Compute mean perf and CI on it\n            perf_array = np.minimum(base_clip_val, perf_array)\n            mean_perf = np.mean(perf_array, axis=1)\n            assert mean_perf.shape == rand_perf_mean.shape\n            EB = t_EB(perf_array, alpha=ALPHA, axis=1)\n            assert EB.shape == rand_perf_mean.shape\n            agg_result[PERF_MEAN].loc[{TEST_CASE: func_name, METHOD: method_name}] = mean_perf\n            agg_result[LB_MEAN].loc[{TEST_CASE: func_name, METHOD: method_name}] = mean_perf - EB\n            agg_result[UB_MEAN].loc[{TEST_CASE: func_name, METHOD: method_name}] = mean_perf + EB\n    assert not any(np.any(np.isnan(agg_result[kk].values)) for kk in agg_result)\n\n    # Compute summary score over all test cases, summarize performance of each method\n    summary = xru.ds_like(\n        perf_da,\n        (PERF_MED, LB_MED, UB_MED, PERF_MEAN, LB_MEAN, UB_MEAN, NORMED_MEAN, LB_NORMED_MEAN, UB_NORMED_MEAN),\n        (ITER, METHOD),\n    )\n    summary[PERF_MED], summary[LB_MED], summary[UB_MED] = xr.apply_ufunc(\n        qt.quantile_and_CI,\n        agg_result[NORMED_MED],\n        input_core_dims=[[TEST_CASE]],\n        kwargs={\"q\": EVAL_Q, \"alpha\": ALPHA},\n        output_core_dims=[[], [], []],\n    )\n\n    summary[PERF_MEAN] = agg_result[NORMED_MEAN].mean(dim=TEST_CASE)\n    EB = xr.apply_ufunc(t_EB, agg_result[NORMED_MEAN], input_core_dims=[[TEST_CASE]])\n    summary[LB_MEAN] = summary[PERF_MEAN] - EB\n    summary[UB_MEAN] = summary[PERF_MEAN] + EB\n\n    normalizer = baseline_mean_da.mean(dim=TEST_CASE)\n    summary[NORMED_MEAN] = summary[PERF_MEAN] / normalizer\n    summary[LB_NORMED_MEAN] = summary[LB_MEAN] / normalizer\n    summary[UB_NORMED_MEAN] = summary[UB_MEAN] / normalizer\n\n    assert all(tuple(summary[kk].dims) == (ITER, METHOD) for kk in summary)\n    return agg_result, summary\n\n\ndef main():\n    \"\"\"See README for instructions on calling analysis.\n    \"\"\"\n    description = \"Analyze results from aggregated studies\"\n    args = parse_args(general_parser(description))\n\n    # Metric used on leaderboard\n    leaderboard_metric = cc.VISIBLE_TO_OPT\n\n    logger.setLevel(logging.INFO)  # Note this is the module-wide logger\n    if args[CmdArgs.verbose]:\n        logger.addHandler(logging.StreamHandler())\n\n    # Load in the eval data and sanity check\n    perf_ds, meta = XRSerializer.load_derived(args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.EVAL_RESULTS)\n    logger.info(\"Meta data from source file: %s\" % str(meta[\"args\"]))\n\n    # Check if there is baselines file, other make one\n    if cc.BASELINE not in XRSerializer.get_derived_keys(args[CmdArgs.db_root], db=args[CmdArgs.db]):\n        warnings.warn(\"Baselines not found. Need to construct baseline.\")\n        do_baseline(args)\n\n    # Load in baseline scores data and sanity check (including compatibility with eval data)\n    baseline_ds, meta_ref = XRSerializer.load_derived(args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.BASELINE)\n    logger.info(\"baseline data from source ref file: %s\" % str(meta_ref[\"args\"]))\n\n    # Check test case signatures match between eval data and baseline data\n    sig_errs, signatures = analyze_signature_pair(meta[\"signature\"], meta_ref[\"signature\"])\n    logger.info(\"Signature errors:\\n%s\" % sig_errs.to_string())\n    print(json.dumps({\"exp-anal sig errors\": sig_errs.T.to_dict()}))\n\n    # Subset baseline to only the test cases run in the experiments\n    test_cases_run = perf_ds.coords[TEST_CASE].values.tolist()\n    assert set(test_cases_run) <= set(\n        baseline_ds.coords[TEST_CASE].values.tolist()\n    ), \"Data set contains test cases not found in baseline.\"\n    baseline_ds = baseline_ds.sel({TEST_CASE: test_cases_run})\n\n    # Also subset to allow shorter runs\n    iters_run = perf_ds.coords[ITER].values.tolist()\n    assert set(iters_run) <= set(\n        baseline_ds.coords[ITER].values.tolist()\n    ), \"Data set not same batch size or too many iters compared to baseline.\"\n    baseline_ds = baseline_ds.sel({ITER: iters_run})\n\n    # Do the actual computation\n    perf_visible = perf_ds[cc.VISIBLE_TO_OPT]\n    agg_result = OrderedDict()\n    summary = OrderedDict()\n    for metric_for_scoring in sorted(perf_ds):\n        perf_da = perf_ds[metric_for_scoring]\n        baseline_ds_ = baseline_ds.sel({OBJECTIVE: metric_for_scoring}, drop=True)\n        agg_result[(metric_for_scoring,)], summary[(metric_for_scoring,)] = compute_aggregates(\n            perf_da, baseline_ds_, perf_visible\n        )\n    agg_result = xru.ds_concat(agg_result, dims=(cc.OBJECTIVE,))\n    summary = xru.ds_concat(summary, dims=(cc.OBJECTIVE,))\n\n    for metric_for_scoring in sorted(perf_ds):\n        # Print summary by problem\n        # Recall that:\n        # ... summary[PERF_MEAN] = agg_result[NORMED_MEAN].mean(dim=TEST_CASE)\n        # ... summary[NORMED_MEAN] = summary[PERF_MEAN] / normalizer\n        # Where normalizer is constant across all problems, optimizers\n        print(\"Scores by problem (JSON):\\n\")\n        agg_df = agg_result[NORMED_MEAN].sel({cc.OBJECTIVE: metric_for_scoring}, drop=True)[{ITER: -1}].to_pandas().T\n        print(json.dumps({metric_for_scoring: agg_df.to_dict()}))\n        print(\"\\n\")\n\n        final_score = summary[PERF_MED].sel({cc.OBJECTIVE: metric_for_scoring}, drop=True)[{ITER: -1}]\n        logger.info(\"median score @ %d:\\n%s\" % (summary.sizes[ITER], xru.da_to_string(final_score)))\n        final_score = summary[PERF_MEAN].sel({cc.OBJECTIVE: metric_for_scoring}, drop=True)[{ITER: -1}]\n        logger.info(\"mean score @ %d:\\n%s\" % (summary.sizes[ITER], xru.da_to_string(final_score)))\n\n        print(\"Final scores (JSON):\\n\")\n        print(json.dumps({metric_for_scoring: final_score.to_series().to_dict()}))\n        print(\"\\n\")\n\n        final_score = summary[NORMED_MEAN].sel({cc.OBJECTIVE: metric_for_scoring}, drop=True)[{ITER: -1}]\n        logger.info(\"normed mean score @ %d:\\n%s\" % (summary.sizes[ITER], xru.da_to_string(final_score)))\n\n    # Now saving results\n    meta = {\"args\": serializable_dict(args), \"signature\": signatures}\n    XRSerializer.save_derived(agg_result, meta, args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.PERF_RESULTS)\n\n    XRSerializer.save_derived(summary, meta, args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.MEAN_SCORE)\n\n    final_msg = xru.da_to_string(\n        100 * (1.0 - summary[PERF_MEAN].sel({cc.OBJECTIVE: leaderboard_metric}, drop=True)[{ITER: -1}])\n    )\n    logger.info(\"-\" * 20)\n    logger.info(\"Final score `100 x (1-loss)` for leaderboard:\\n%s\" % final_msg)\n\n\nif __name__ == \"__main__\":\n    main()  # pragma: main\n"
  },
  {
    "path": "bayesmark/experiment_baseline.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Build performance baselines from aggregate results to prepare analysis.\n\"\"\"\nimport logging\nimport warnings\nfrom collections import OrderedDict\n\nimport numpy as np\n\nimport bayesmark.constants as cc\nimport bayesmark.expected_max as em\nimport bayesmark.quantiles as qt\nfrom bayesmark.cmd_parse import CmdArgs, general_parser, parse_args\nfrom bayesmark.constants import ARG_DELIM, ITER, METHOD, PERF_BEST, PERF_CLIP, PERF_MEAN, PERF_MED, SUGGEST, TEST_CASE\nfrom bayesmark.experiment_aggregate import validate_agg_perf\nfrom bayesmark.serialize import XRSerializer\nfrom bayesmark.util import str_join_safe\nfrom bayesmark.xr_util import ds_concat, ds_like_mixed\n\n# Mathematical settings\n# We could move these to constants to eliminate repetition but we will probably phase out anyway\nEVAL_Q = 0.5  # Evaluate based on median loss across n_trials\nALPHA = 0.05  # ==> 95% CIs\nMIN_POS = np.nextafter(0, 1)\nPAD_FACTOR = 10000\n\nlogger = logging.getLogger(__name__)\n\n\ndef validate(baseline_ds):\n    \"\"\"Perform same tracks as will happen in analysis.\"\"\"\n    for func_name in baseline_ds.coords[TEST_CASE].values:\n        rand_perf_med = baseline_ds[PERF_MED].sel({TEST_CASE: func_name}, drop=True).values\n        rand_perf_mean = baseline_ds[PERF_MEAN].sel({TEST_CASE: func_name}, drop=True).values\n        best_opt = baseline_ds[PERF_BEST].sel({TEST_CASE: func_name}, drop=True).values\n        base_clip_val = baseline_ds[PERF_CLIP].sel({TEST_CASE: func_name}, drop=True).values\n\n        assert np.all(np.diff(rand_perf_med) <= 0), \"Baseline should be decreasing with iteration\"\n        assert np.all(np.diff(rand_perf_mean) <= 0), \"Baseline should be decreasing with iteration\"\n        assert np.all(rand_perf_med > best_opt)\n        assert np.all(rand_perf_mean > best_opt)\n        assert np.all(rand_perf_mean <= base_clip_val)\n\n\ndef compute_baseline(perf_da):\n    \"\"\"Compute a performance baseline of base and best performance from the aggregate experimental results.\n\n    Parameters\n    ----------\n    perf_da : :class:`xarray:xarray.DataArray`\n        Aggregate experimental results with each function evaluation in the experiments. `all_perf` has dimensions\n        ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)`` as is assumed to have no nan values.\n\n    Returns\n    -------\n    baseline_ds : :class:`xarray:xarray.Dataset`\n        Dataset with baseline performance. It was variables ``(PERF_MED, PERF_MEAN, PERF_CLIP, PERF_BEST)`` with\n        dimensions ``(ITER, TEST_CASE)``, ``(ITER, TEST_CASE)``, ``(TEST_CASE,)``, and ``(TEST_CASE,)``, respectively.\n        `PERF_MED` is a baseline of performance based on random search when using medians to summarize performance.\n        Likewise, `PERF_MEAN` is for means. `PERF_CLIP` is an upperbound to clip poor performance when using the mean.\n        `PERF_BEST` is an estimate on the global minimum.\n    \"\"\"\n    validate_agg_perf(perf_da)\n\n    ref_prefix = str_join_safe(ARG_DELIM, (cc.RANDOM_SEARCH, \"\"))\n    ref_random = [kk for kk in perf_da.coords[METHOD].values if kk.startswith(ref_prefix)]\n    assert len(ref_random) > 0, \"Did not find any random search in methods.\"\n\n    # Now many points we will have after each batch\n    trials_grid = perf_da.sizes[SUGGEST] * (1 + np.arange(perf_da.sizes[ITER]))\n\n    # Now iterate over problems and get baseline performance\n    baseline_ds = ds_like_mixed(\n        perf_da,\n        [\n            (PERF_MED, [ITER, TEST_CASE]),\n            (PERF_MEAN, [ITER, TEST_CASE]),\n            (PERF_CLIP, [TEST_CASE]),\n            (PERF_BEST, [TEST_CASE]),\n        ],\n        (ITER, TEST_CASE),\n    )\n    for func_name in perf_da.coords[TEST_CASE].values:\n        random_evals = np.ravel(perf_da.sel({METHOD: ref_random, TEST_CASE: func_name}, drop=True).values)\n        assert random_evals.size > 0\n\n        # We will likely change this to a min mean (instead of median) using a different util in near future:\n        assert np.all(trials_grid == perf_da.sizes[SUGGEST] * (1 + baseline_ds.coords[ITER].values))\n        rand_perf, _, _ = qt.min_quantile_CI(random_evals, EVAL_Q, trials_grid, alpha=ALPHA)\n        baseline_ds[PERF_MED].loc[{TEST_CASE: func_name}] = rand_perf\n\n        # Decide on a level to clip when computing the mean\n        base_clip_val = qt.quantile(random_evals, EVAL_Q)\n        assert np.isfinite(base_clip_val), \"Median random search performance is not even finite.\"\n        assert (perf_da.sizes[SUGGEST] > 1) or np.isclose(base_clip_val, rand_perf[0])\n        baseline_ds[PERF_CLIP].loc[{TEST_CASE: func_name}] = base_clip_val\n\n        # Estimate the global min via best of any method\n        best_opt = np.min(perf_da.sel({TEST_CASE: func_name}, drop=True).values)\n        if np.any(rand_perf <= best_opt):\n            warnings.warn(\n                \"Random search is also the best search on %s, the normalized score may be meaningless.\" % func_name,\n                RuntimeWarning,\n            )\n        assert np.isfinite(best_opt), \"Best performance found is not even finite.\"\n        logger.info(\"best %s %f\" % (func_name, best_opt))\n\n        # Now make sure strictly less than to avoid assert error in linear_rescale. This will likely give normalized\n        # scores of +inf or -inf, but with median summary that is ok. When everything goes to mean, we will need to\n        # change this:\n        pad = PAD_FACTOR * np.spacing(-np.maximum(MIN_POS, np.abs(best_opt)))\n        assert pad < 0\n        best_opt = best_opt + pad\n        assert np.isfinite(best_opt), \"Best performance too close to limit of float range.\"\n        assert np.all(rand_perf > best_opt)\n        baseline_ds[PERF_BEST].loc[{TEST_CASE: func_name}] = best_opt\n\n        random_evals = np.minimum(base_clip_val, random_evals)\n        assert np.all(np.isfinite(random_evals))\n        assert np.all(best_opt <= random_evals)\n\n        rand_perf = em.expected_min(random_evals, trials_grid)\n        rand_perf_fixed = np.minimum(base_clip_val, rand_perf)\n        assert np.allclose(rand_perf, rand_perf_fixed)\n        rand_perf_fixed = np.minimum.accumulate(rand_perf_fixed)\n        assert np.allclose(rand_perf, rand_perf_fixed)\n        baseline_ds[PERF_MEAN].loc[{TEST_CASE: func_name}] = rand_perf_fixed\n    assert not any(np.any(np.isnan(baseline_ds[kk].values)) for kk in baseline_ds)\n    validate(baseline_ds)\n    return baseline_ds\n\n\ndef do_baseline(args):  # pragma: io\n    \"\"\"Alternate entry into the program without calling the actual main.\n    \"\"\"\n    # Load in the eval data and sanity check\n    perf_ds, meta = XRSerializer.load_derived(args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.EVAL_RESULTS)\n    logger.info(\"Meta data from source file: %s\" % str(meta[\"args\"]))\n\n    D = OrderedDict()\n    for kk in perf_ds:\n        perf_da = perf_ds[kk]\n        D[(kk,)] = compute_baseline(perf_da)\n    baseline_ds = ds_concat(D, dims=(cc.OBJECTIVE,))\n\n    # Keep in same order for cleanliness\n    baseline_ds = baseline_ds.sel({cc.OBJECTIVE: list(perf_ds)})\n    assert list(perf_ds) == baseline_ds.coords[cc.OBJECTIVE].values.tolist()\n\n    # Could optionally remove this once we think things have enough tests\n    for kk in D:\n        assert baseline_ds.sel({cc.OBJECTIVE: kk[0]}, drop=True).identical(D[kk])\n\n    # Now dump the results\n    XRSerializer.save_derived(baseline_ds, meta, args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.BASELINE)\n\n\ndef main():\n    \"\"\"See README for instructions on calling baseline.\n    \"\"\"\n    description = \"Aggregate the baselines for later analysis in benchmark\"\n    args = parse_args(general_parser(description))\n\n    logger.setLevel(logging.INFO)  # Note this is the module-wide logger\n    if args[CmdArgs.verbose]:\n        logger.addHandler(logging.StreamHandler())\n\n    do_baseline(args)\n    logger.info(\"done\")\n\n\nif __name__ == \"__main__\":\n    main()  # pragma: main\n"
  },
  {
    "path": "bayesmark/experiment_db_init.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Tool to create new datebase for results. This is just wrapper on serializer init call.\n\"\"\"\nimport logging\n\nimport bayesmark.cmd_parse as cmd\nfrom bayesmark.cmd_parse import CmdArgs\nfrom bayesmark.constants import EXP_VARS\nfrom bayesmark.serialize import XRSerializer\n\nEXIST_OK = True\n\nlogger = logging.getLogger(__name__)\n\n\ndef main():\n    \"\"\"See README for instructions on calling db_init.\n    \"\"\"\n    description = \"Initialize the directories for running the experiments\"\n    args = cmd.parse_args(cmd.general_parser(description))\n\n    assert not args[CmdArgs.dry_run], \"Dry run doesn't make any sense when building dirs\"\n\n    logger.setLevel(logging.INFO)  # Note this is the module-wide logger\n    if args[CmdArgs.verbose]:\n        logger.addHandler(logging.StreamHandler())\n\n    XRSerializer.init_db(args[CmdArgs.db_root], db=args[CmdArgs.db], keys=EXP_VARS, exist_ok=EXIST_OK)\n\n    logger.info(\"done\")\n\n\nif __name__ == \"__main__\":\n    main()  # pragma: main\n"
  },
  {
    "path": "bayesmark/experiment_launcher.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Launch studies in separate studies or do dry run to build jobs file with lists of commands to run.\n\"\"\"\nimport json\nimport logging\nimport random as pyrandom\nimport uuid as pyuuid\nimport warnings\nfrom itertools import product\nfrom subprocess import TimeoutExpired, call\n\nimport numpy as np\n\nimport bayesmark.cmd_parse as cmd\nfrom bayesmark.builtin_opt.config import CONFIG\nfrom bayesmark.cmd_parse import CMD_STR, CmdArgs, serializable_dict\nfrom bayesmark.constants import ARG_DELIM, DATA_LOADER_NAMES, EXP_VARS, METRICS, MODEL_NAMES, PY_INTERPRETER\nfrom bayesmark.data import METRICS_LOOKUP, get_problem_type\nfrom bayesmark.np_util import random as np_random\nfrom bayesmark.np_util import random_seed, strat_split\nfrom bayesmark.path_util import absopen\nfrom bayesmark.serialize import XRSerializer\nfrom bayesmark.util import range_str, shell_join, str_join_safe, strict_sorted\n\n# How much of uuid to put in job name to avoid name clashes\nUUID_JOB_CHARS = 7\n# Warning: this name is also specified in setup.py, and violates the DRY principle. So if it gets changed in setup.py,\n# it must also be changed here!\nEXPERIMENT_ENTRY = \"bayesmark-exp\"\n\nlogger = logging.getLogger(__name__)\n\n\ndef _is_arg_safe(ss):\n    \"\"\"Check if `str` is safe as argument to `argparse`.\"\"\"\n    if len(ss) == 0:\n        return False\n    safe = ss[0] != \"-\"\n    return safe\n\n\ndef arg_safe_str(val):\n    \"\"\"Cast value as `str`, raise error if not safe as argument to `argparse`.\"\"\"\n    ss = str(val)\n    if not _is_arg_safe(ss):\n        raise ValueError(\"%s is not safe for argparse\" % ss)\n    return ss\n\n\ndef gen_commands(args, opt_file_lookup, run_uuid):\n    \"\"\"Generator providing commands to launch processes for experiments.\n\n    Parameters\n    ----------\n    args : dict(CmdArgs, [int, str])\n        Arguments of options to pass to the experiments being launched. The keys corresponds to the same arguments\n        passed to this program.\n    opt_file_lookup : dict(str, str)\n        Mapping from method name to filename containing wrapper class for the method.\n    run_uuid : uuid.UUID\n        UUID for this launcher run. Needed to generate different experiments UUIDs on each call. This function is\n        deterministic provided the same `run_uuid`.\n\n    Yields\n    ------\n    iteration_key : (str, str, str, str)\n        Tuple containing ``(trial, classifier, data, optimizer)`` to index the experiment.\n    full_cmd : tuple(str)\n        Strings containing command and arguments to run a process with experiment. Join with whitespace or use\n        :func:`.util.shell_join` to get string with executable command. The command omits ``--opt-root`` which means it\n        will default to ``.`` if the command is executed. As such, the command assumes it is executed with\n        ``--opt-root`` as the working directory.\n    \"\"\"\n    args_to_pass_thru = [CmdArgs.n_calls, CmdArgs.n_suggest, CmdArgs.db_root, CmdArgs.db]\n    # This could be made simpler and avoid if statement if we just always pass dataroot, even if no custom data used.\n    if args[CmdArgs.data_root] is not None:\n        args_to_pass_thru.append(CmdArgs.data_root)\n\n    # Possibilities to iterate over. Put them in sorted order just for good measure.\n    c_list = strict_sorted(MODEL_NAMES if args[CmdArgs.classifier] is None else args[CmdArgs.classifier])\n    d_list = strict_sorted(DATA_LOADER_NAMES if args[CmdArgs.data] is None else args[CmdArgs.data])\n    o_list = strict_sorted(\n        list(opt_file_lookup.keys()) + list(CONFIG.keys())\n        if args[CmdArgs.optimizer] is None\n        else args[CmdArgs.optimizer]\n    )\n    assert all(\n        ((optimizer in opt_file_lookup) or (optimizer in CONFIG)) for optimizer in o_list\n    ), \"unknown optimizer in optimizer list\"\n\n    m_set = set(METRICS if args[CmdArgs.metric] is None else args[CmdArgs.metric])\n    m_lookup = {problem_type: sorted(m_set.intersection(mm)) for problem_type, mm in METRICS_LOOKUP.items()}\n    assert all(\n        (len(m_lookup[get_problem_type(data)]) > 0) for data in d_list\n    ), \"At one metric needed for each problem type of data sets\"\n\n    G = product(range_str(args[CmdArgs.n_repeat]), c_list, d_list, o_list)  # iterate all combos\n    for rep, classifier, data, optimizer in G:\n        _, rep_str = rep\n        problem_type = get_problem_type(data)\n        for metric in m_lookup[problem_type]:\n            # Get a reproducible string based (conditioned on having same (run uuid), but should also never give\n            # a duplicate (unless we force the same run uuid twice).\n            iteration_key = (rep_str, classifier, data, optimizer, metric)\n            iteration_id = str_join_safe(ARG_DELIM, iteration_key)\n            sub_uuid = pyuuid.uuid5(run_uuid, iteration_id).hex\n\n            # Build the argument list for subproc, passing some args thru\n            cmd_args_pass_thru = [[CMD_STR[vv][0], arg_safe_str(args[vv])] for vv in args_to_pass_thru]\n            # Technically, the optimizer is is not actually needed here for non-built in optimizers because it already\n            # specified via the entry point: optimizer_wrapper_file\n            cmd_args = [\n                [CMD_STR[CmdArgs.classifier][0], arg_safe_str(classifier)],\n                [CMD_STR[CmdArgs.data][0], arg_safe_str(data)],\n                [CMD_STR[CmdArgs.optimizer][0], arg_safe_str(optimizer)],\n                [CMD_STR[CmdArgs.uuid][0], arg_safe_str(sub_uuid)],\n                [CMD_STR[CmdArgs.metric][0], arg_safe_str(metric)],\n            ]\n            cmd_args = tuple(sum(cmd_args + cmd_args_pass_thru, []))\n            logger.info(\" \".join(cmd_args))\n\n            # The experiment command without the arguments\n            if optimizer in CONFIG:  # => built in optimizer wrapper\n                experiment_cmd = (EXPERIMENT_ENTRY,)\n            else:\n                optimizer_wrapper_file = opt_file_lookup[optimizer]\n                assert optimizer_wrapper_file.endswith(\".py\"), \"optimizer wrapper should a be .py file\"\n                experiment_cmd = (PY_INTERPRETER, optimizer_wrapper_file)\n\n            # Check arg safe again, off elements in list need to be argsafe\n            assert all((_is_arg_safe(ss) == (ii % 2 == 1)) for ii, ss in enumerate(cmd_args))\n\n            full_cmd = experiment_cmd + cmd_args\n            yield iteration_key, full_cmd\n\n\ndef dry_run(args, opt_file_lookup, run_uuid, fp, random=np_random):\n    \"\"\"Write to buffer description of commands for running all experiments.\n\n    This function is almost pure by writing to a buffer, but it could be switched to a generator.\n\n    Parameters\n    ----------\n    args : dict(CmdArgs, [int, str])\n        Arguments of options to pass to the experiments being launched. The keys corresponds to the same arguments\n        passed to this program.\n    opt_file_lookup : dict(str, str)\n        Mapping from method name to filename containing wrapper class for the method.\n    run_uuid : uuid.UUID\n        UUID for this launcher run. Needed to generate different experiments UUIDs on each call. This function is\n        deterministic provided the same `run_uuid`.\n    fp : writable buffer\n        File handle to write out sequence of commands to execute (broken into jobs on each line) to execute all the\n        experiments (possibly each job in parallel).\n    random : RandomState\n        Random stream to use for reproducibility.\n    \"\"\"\n    assert args[CmdArgs.n_jobs] > 0, \"Must have non-zero jobs for dry run\"\n\n    # Taking in file pointer since then we can test without actual file. Could also build generator that returns lines\n    # to write.\n    manual_setup_info = XRSerializer.init_db_manual(args[CmdArgs.db_root], db=args[CmdArgs.db], keys=EXP_VARS)\n    warnings.warn(manual_setup_info, UserWarning)\n\n    # Get the commands\n    dry_run_commands = {}\n    G = gen_commands(args, opt_file_lookup, run_uuid)\n    for (_, _, _, optimizer, _), full_cmd in G:\n        cmd_str = shell_join(full_cmd)\n        dry_run_commands.setdefault(optimizer, []).append(cmd_str)\n\n    # Make sure we never have any empty jobs, which is a waste\n    n_commands = sum(len(v) for v in dry_run_commands.values())\n    n_jobs = min(args[CmdArgs.n_jobs], n_commands)\n\n    # Would prob also work with pyrandom, but only tested np random so far\n    subcommands = strat_split(list(dry_run_commands.values()), n_jobs, random=random)\n    # Make sure have same commands overall, delete once we trust strat_split\n    assert sorted(np.concatenate(subcommands)) == sorted(sum(list(dry_run_commands.values()), []))\n\n    job_suffix = run_uuid.hex[:UUID_JOB_CHARS]\n\n    # Include comments as reproducibility lines\n    args_str = serializable_dict(args)\n    fp.write(\"# running: %s\\n\" % str(args_str))\n    fp.write(\"# cmd: %s\\n\" % cmd.cmd_str())\n    for ii, ii_str in range_str(n_jobs):\n        assert len(subcommands[ii]) > 0\n        fp.write(\"job_%s_%s %s\\n\" % (job_suffix, ii_str, \" && \".join(subcommands[ii])))\n\n\ndef real_run(args, opt_file_lookup, run_uuid, timeout=None):  # pragma: io\n    \"\"\"Run sequence of independent experiments to fully run the benchmark.\n\n    This uses `subprocess` to launch a separate process (in serial) for each experiment.\n\n    Parameters\n    ----------\n    args : dict(CmdArgs, [int, str])\n        Arguments of options to pass to the experiments being launched. The keys corresponds to the same arguments\n        passed to this program.\n    opt_file_lookup : dict(str, str)\n        Mapping from method name to filename containing wrapper class for the method.\n    run_uuid : uuid.UUID\n        UUID for this launcher run. Needed to generate different experiments UUIDs on each call. This function is\n        deterministic provided the same `run_uuid`.\n    timeout : int\n        Max seconds per experiment\n    \"\"\"\n    args[CmdArgs.db] = XRSerializer.init_db(args[CmdArgs.db_root], db=args[CmdArgs.db], keys=EXP_VARS, exist_ok=True)\n    logger.info(\"Supply --db %s to append to this experiment or reproduce jobs file.\" % args[CmdArgs.db])\n\n    # Get and run the commands in a sub-process\n    counter = 0\n    G = gen_commands(args, opt_file_lookup, run_uuid)\n    for _, full_cmd in G:\n        try:\n            status = call(full_cmd, shell=False, cwd=args[CmdArgs.optimizer_root], timeout=timeout)\n            if status != 0:\n                raise ChildProcessError(\"status code %d returned from:\\n%s\" % (status, \" \".join(full_cmd)))\n        except TimeoutExpired:\n            logger.info(f\"Experiment timeout after {timeout} seconds.\")\n            print(json.dumps({\"experiment_timeout_exception\": \" \".join(full_cmd)}))\n\n        counter += 1\n    logger.info(f\"Benchmark script ran {counter} studies successfully.\")\n\n\ndef main():\n    \"\"\"See README for instructions on calling launcher.\n    \"\"\"\n    description = \"Launch series of studies across functions and optimizers\"\n    args = cmd.parse_args(cmd.launcher_parser(description))\n\n    logger.setLevel(logging.INFO)  # Note this is the module-wide logger\n    if args[CmdArgs.verbose]:\n        logger.addHandler(logging.StreamHandler())\n\n    # Get optimizer settings, says which file to call for each optimizer\n    settings = cmd.load_optimizer_settings(args[CmdArgs.optimizer_root])\n    opt_file_lookup = {optimizer: wrapper_file for optimizer, (wrapper_file, _) in settings.items()}\n\n    # Setup uuid\n    if args[CmdArgs.uuid] is None:\n        args[CmdArgs.uuid] = pyuuid.uuid4().hex  # debatable if uuid1 or uuid4 is better here\n    else:\n        warnings.warn(\n            \"User UUID supplied. This is only desired for debugging. Careless use could lead to study id conflicts.\",\n            UserWarning,\n        )\n    run_uuid = pyuuid.UUID(hex=args[CmdArgs.uuid])\n    assert run_uuid.hex == args[CmdArgs.uuid]\n    logger.info(\"Supply --uuid %s to reproduce this run.\" % run_uuid.hex)\n\n    # Log all the options\n    print(\"Launcher options (JSON):\\n\")\n    print(json.dumps({\"bayesmark-launch-args\": cmd.serializable_dict(args)}))\n    print(\"\\n\")\n\n    # Set the master seed (derive from the uuid we just setup)\n    pyrandom.seed(run_uuid.int)\n    np.random.seed(random_seed(pyrandom))\n\n    # Now run it, either to dry run file or executes sub-processes\n    if args[CmdArgs.dry_run]:\n        with absopen(args[CmdArgs.jobs_file], \"w\") as fp:\n            dry_run(args, opt_file_lookup, run_uuid, fp)\n    else:\n        timeout = args[CmdArgs.timeout] if args[CmdArgs.timeout] > 0 else None\n        real_run(args, opt_file_lookup, run_uuid, timeout)\n\n    logger.info(\"done\")\n\n\nif __name__ == \"__main__\":\n    main()  # pragma: main\n"
  },
  {
    "path": "bayesmark/np_util.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Utilities to that could be included in `numpy` but aren't.\n\"\"\"\nimport numpy as np\n\n# np seed must be in [0, 2**32 - 1] = [0, uint32 max]\nSEED_MAX_INCL = np.iinfo(np.uint32).max\n\n# Access default numpy rng in way that is short and sphinx friendly\nrandom = np.random.random.__self__\n\n\ndef random_seed(random=random):\n    \"\"\"Draw a random seed compatible with :class:`numpy:numpy.random.RandomState`.\n\n    Parameters\n    ----------\n    random : :class:`numpy:numpy.random.RandomState`\n        Random stream to use to draw the random seed.\n\n    Returns\n    -------\n    seed : int\n        Seed for a new random stream in ``[0, 2**32-1)``.\n    \"\"\"\n    # np randint is exclusive on the high value, py randint is inclusive. We\n    # must use inclusive limit here to work with both. We are missing one\n    # possibility here (2**32-1), but I don't think that matters.\n    seed = random.randint(0, SEED_MAX_INCL)\n    return seed\n\n\ndef shuffle_2d(X, random=random):\n    \"\"\"Generalization of :func:`numpy:numpy.random.shuffle` of 2D array.\n\n    Performs in-place shuffling of `X`. So, it has no return value.\n\n    Parameters\n    ----------\n    X : :class:`numpy:numpy.ndarray` of shape (n, m)\n        Array-like 2D data to shuffle in place. Shuffles order of rows and order of elements within a row.\n    random : :class:`numpy:numpy.random.RandomState`\n        Random stream to use to draw the random seed.\n    \"\"\"\n    random.shuffle(X)\n    for rr in X:\n        random.shuffle(rr)\n\n\ndef strat_split(X, n_splits, inplace=False, random=random):\n    \"\"\"Make a stratified random split of items.\n\n    Parameters\n    ----------\n    X : :class:`numpy:numpy.ndarray` of shape (n, m)\n        Data we would like to split randomly into groups. We should get the same number +/-1 of elements from each row\n        in each group.\n    n_splits : int\n        How many groups we want to split into.\n    inplace : bool\n        If true, this function will cause in place modifications to `X`.\n    random : :class:`numpy:numpy.random.RandomState`\n        Random stream to use for reproducibility.\n\n    Returns\n    -------\n    Y : list(:class:`numpy:numpy.ndarray`)\n        Stratified split of `X` where each row of `Y` contains the same number +/-1 of elements from each row of `X`.\n        Must be a list of arrays since each row may have a different length.\n    \"\"\"\n    # Arguably, this function could go in stats\n    assert np.ndim(X) == 2\n    assert n_splits > 0\n\n    if not inplace:\n        X = np.array(X, copy=True)\n\n    shuffle_2d(X, random=random)\n    # Note this is like X.T.ravel()\n    Y = np.array_split(np.ravel(X, order=\"F\"), n_splits)\n\n    # Just for good measure make sure this is shuffled too, prob not needed.\n    shuffle_2d(Y, random=random)\n    return Y\n\n\ndef isclose_lte(x, y):\n    \"\"\"Check that less than or equal to (lte, ``x <= y``) is approximately true between all elements of `x` and `y`.\n\n    This is similar to :func:`numpy:numpy.allclose` for equality. Shapes of all input variables must be broadcast\n    compatible.\n\n    Parameters\n    ----------\n    x : :class:`numpy:numpy.ndarray`\n        Lower limit in ``<=`` check.\n    y : :class:`numpy:numpy.ndarray`\n        Upper limit in ``<=`` check.\n\n    Returns\n    -------\n    lte : bool\n        True if ``x <= y`` is approximately true element-wise.\n    \"\"\"\n    # Use np.less_equal to ensure always np type consistently\n    lte = np.less_equal(x, y) | np.isclose(x, y)\n    return lte\n\n\ndef clip_chk(x, lb, ub, allow_nan=False):\n    \"\"\"Clip all element of `x` to be between `lb` and `ub` like :func:`numpy:numpy.clip`, but also check\n    :func:`numpy:numpy.isclose`.\n\n    Shapes of all input variables must be broadcast compatible.\n\n    Parameters\n    ----------\n    x : :class:`numpy:numpy.ndarray`\n        Array containing elements to clip.\n    lb : :class:`numpy:numpy.ndarray`\n        Lower limit in clip.\n    ub : :class:`numpy:numpy.ndarray`\n        Upper limit in clip.\n    allow_nan : bool\n        If true, we allow ``nan`` to be present in `x` without out raising an error.\n\n    Returns\n    -------\n    x : :class:`numpy:numpy.ndarray`\n        An array with the elements of `x`, but where values < `lb` are replaced with `lb`, and those > `ub` with `ub`.\n    \"\"\"\n    assert np.all(lb <= ub)  # np.clip does not do this check\n\n    x = np.asarray(x)\n\n    # These are asserts not exceptions since clip_chk most used internally.\n    if allow_nan:\n        assert np.all(isclose_lte(lb, x) | np.isnan(x))\n        assert np.all(isclose_lte(x, ub) | np.isnan(x))\n    else:\n        assert np.all(isclose_lte(lb, x))\n        assert np.all(isclose_lte(x, ub))\n    x = np.clip(x, lb, ub)\n    return x\n\n\ndef snap_to(x, fixed_val=None):\n    \"\"\"Snap input `x` to the `fixed_val` unless `fixed_val` is `None`, where `x` is returned.\n\n    Parameters\n    ----------\n    x : :class:`numpy:numpy.ndarray`\n        Array containing elements to snap.\n    fixed_val : :class:`numpy:numpy.ndarray` or None\n        Values to be returned if `x` is close, otherwise an error is raised. If `fixed_val` is `None`, `x` is returned.\n\n    Returns\n    -------\n    fixed_val : :class:`numpy:numpy.ndarray`\n        Snapped to value of `x`.\n    \"\"\"\n    if fixed_val is None:\n        return x\n\n    # Include == for discrete types where allclose doesn't work\n    if not (np.all(x == fixed_val) or np.allclose(x, fixed_val)):\n        raise ValueError(\"Expected fixed value %s, got %s.\" % (repr(fixed_val), repr(x)))\n\n    assert np.all(x == fixed_val) or np.allclose(x, fixed_val)\n    fixed_val = np.broadcast_to(fixed_val, np.shape(x))\n    return fixed_val\n\n\ndef linear_rescale(X, lb0, ub0, lb1, ub1, enforce_bounds=True):\n    \"\"\"Linearly transform all elements of `X`, bounded between `lb0` and `ub0`, to be between `lb1` and `ub1`.\n\n    Shapes of all input variables must be broadcast compatible.\n\n    Parameters\n    ----------\n    X : :class:`numpy:numpy.ndarray`\n        Array containing elements to rescale.\n    lb0 : :class:`numpy:numpy.ndarray`\n        Current lower bound of `X`.\n    ub0 : :class:`numpy:numpy.ndarray`\n        Current upper bound of `X`.\n    lb1 : :class:`numpy:numpy.ndarray`\n        Desired lower bound of `X`.\n    ub1 : :class:`numpy:numpy.ndarray`\n        Desired upper bound of `X`.\n    enforce_bounds : bool\n        If True, perform input bounds check (and clipping if slight violation) on the input `X` and again on the\n        output. This argument is not meant to be vectorized like the other input variables.\n\n    Returns\n    -------\n    X : :class:`numpy:numpy.ndarray`\n        Elements of input `X` after linear rescaling.\n    \"\"\"\n    assert np.all(np.isfinite(lb0))\n    assert np.all(np.isfinite(lb1))\n    assert np.all(np.isfinite(ub0))\n    assert np.all(np.isfinite(ub1))\n    assert np.all(lb0 < ub0)\n    assert np.all(lb1 <= ub1)\n\n    m = np.true_divide(ub1 - lb1, ub0 - lb0)\n    assert np.all(m >= 0)\n\n    if enforce_bounds:\n        X = clip_chk(X, lb0, ub0)  # This will flag any non-finite X input.\n        X = clip_chk(m * (X - lb0) + lb1, lb1, ub1)\n    else:\n        X = m * (X - lb0) + lb1\n    return X\n\n\ndef argmin_2d(X):\n    \"\"\"Take the arg minimum of a 2D array.\"\"\"\n    assert X.size > 0, \"argmin of empty array not defined\"\n\n    ii, jj = np.unravel_index(X.argmin(), X.shape)\n    return ii, jj\n\n\ndef cummin(x_val, x_key):\n    \"\"\"Get the cumulative minimum of `x_val` when ranked according to `x_key`.\n\n    Parameters\n    ----------\n    x_val : :class:`numpy:numpy.ndarray` of shape (n, d)\n        The array to get the cumulative minimum of along axis 0.\n    x_key : :class:`numpy:numpy.ndarray` of shape (n, d)\n        The array for ranking elements as to what is the minimum.\n\n    Returns\n    -------\n    c_min : :class:`numpy:numpy.ndarray` of shape (n, d)\n        The cumulative minimum array.\n    \"\"\"\n    assert x_val.shape == x_key.shape\n    assert x_val.ndim == 2\n    assert not np.any(np.isnan(x_key)), \"cummin not defined for nan key\"\n\n    n, _ = x_val.shape\n\n    xm = np.minimum.accumulate(x_key, axis=0)\n    idx = np.maximum.accumulate((x_key <= xm) * np.arange(n)[:, None])\n    c_min = np.take_along_axis(x_val, idx, axis=0)\n    return c_min\n"
  },
  {
    "path": "bayesmark/path_util.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Utilities handy for manipulating paths that have extra checks not included in `os.path`.\n\"\"\"\nimport os.path\nimport warnings\n\n\ndef abspath(path, verify=True):  # pragma: io\n    \"\"\"Combo of :func:`os.path.abspath` and :func:`os.path.expanduser` that will also check existence of directory.\n\n    Parameters\n    ----------\n    path : str\n        Relative path string that can also contain home directories, e.g., ``\"~/git/\"``.\n    verify : bool\n        If true, verifies that the directory exists. Raises an assertion failure if it does not exist.\n\n    Returns\n    -------\n    path : str\n        Absolute version of input path.\n    \"\"\"\n    path = os.path.abspath(os.path.expanduser(path))\n    if verify:\n        assert os.path.isdir(path), \"directory does not exist: %s\" % path\n    return path\n\n\ndef absopen(path, mode):  # pragma: io\n    \"\"\"Safe version of the built in :func:`open` that only opens absolute paths.\n\n    Parameters\n    ----------\n    path : str\n        Absolute path. An assertion failure is raised if it is not absolute.\n    mode : str\n        Open mode, any mode understood by the built in :func:`open`, e.g., ``\"r\"`` or ``\"w\"``.\n\n    Returns\n    -------\n    f : file handle\n        File handle open to use.\n    \"\"\"\n    assert os.path.isabs(path), \"Only allowing opening of absolute paths for safety.\"\n    f = open(path, mode)\n    return f\n\n\ndef _join_safe(*args):  # pragma: io\n    \"\"\"Helper routine with commonalities between `join_safe_r` and `join_safe_w`.\n    \"\"\"\n    assert len(args) >= 2\n    path, fname = args[:-1], args[-1]\n\n    path = os.path.join(*path)  # Put together the dir\n    path = abspath(path, verify=True)  # Make sure dir is abs, and exists\n\n    assert os.path.basename(fname) == fname, \"Expected basename got %s\" % fname\n    fname = os.path.join(path, fname)  # Put on the filename, must be abs\n    # Could check abs again if really wanted to be safe\n    return fname\n\n\ndef join_safe_r(*args):  # pragma: io\n    \"\"\"Safe version of :func:`os.path.join` that checks resulting path is absolute and the file exists for reading.\n\n    Parameters\n    ----------\n    *args : str\n        varargs for parts of path to combine. The last argument must be a file name.\n\n    Returns\n    -------\n    fname : str\n        Absolute path to filename.\n    \"\"\"\n    fname = _join_safe(*args)\n    assert os.path.isfile(fname)  # Check it exists\n    return fname\n\n\ndef join_safe_w(*args):  # pragma: io\n    \"\"\"Safe version of :func:`os.path.join` that checks resulting path is absolute.\n\n    Because this routine is for writing, if the file already exists, a warning is raised.\n\n    Parameters\n    ----------\n    *args : str\n        varargs for parts of path to combine. The last argument must be a file name.\n\n    Returns\n    -------\n    fname : str\n        Absolute path to filename.\n    \"\"\"\n    fname = _join_safe(*args)\n    # Give a warning if it exists\n    if os.path.isfile(fname):\n        warnings.warn(\"file already exists: %s\" % fname, RuntimeWarning)\n    return fname\n"
  },
  {
    "path": "bayesmark/quantiles.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Compute quantiles and confidence intervals.\n\"\"\"\nimport numpy as np\nimport scipy.stats as ss\n\nfrom bayesmark.np_util import isclose_lte\n\n\ndef ensure_shape(x, y):\n    \"\"\"Util to broadcast on var to another but only when shape is different.\n\n    This way we don't convert scalar into array type unnecessarily.\n    \"\"\"\n    shape_y = np.shape(y)\n    if np.shape(x) == shape_y:\n        return x\n    return np.broadcast_to(x, shape_y)\n\n\ndef order_stats(X):\n    \"\"\"Compute order statistics on sample `X`.\n\n    Follows convention that order statistic 1 is minimum and statistic n is maximum. Therefore, array elements ``0``\n    and ``n+1`` are ``-inf`` and ``+inf``.\n\n    Parameters\n    ----------\n    X : :class:`numpy:numpy.ndarray` of shape (n,)\n        Data for order statistics. Can be vectorized. Must be sortable data type (which is almost everything).\n\n    Returns\n    -------\n    o_stats : :class:`numpy:numpy.ndarray` of shape (n+2,)\n        Order statistics on `X`.\n    \"\"\"\n    assert np.ndim(X) >= 1\n    # NaN is not allowed since it does not have well defined order.\n    assert not np.any(np.isnan(X))\n\n    X_shape = np.shape(X)\n    inf_pad = np.full(X_shape[:-1] + (1,), np.inf)\n\n    o_stats = np.concatenate((-inf_pad, np.sort(X, axis=-1), inf_pad), axis=-1)\n    return o_stats\n\n\ndef _quantile(n, q):\n    idx = np.ceil(n * q).astype(int)\n    return idx\n\n\ndef quantile(X, q):\n    \"\"\"Computes `q` th quantile of `X`.\n\n    Similar to :func:`numpy:numpy.percentile` except that it matches the mathematical definition of a quantile *and*\n    `q` is scaled in (0,1) rather than (0,100).\n\n    Parameters\n    ----------\n    X : :class:`numpy:numpy.ndarray` of shape (n,)\n        Data for quantile estimation. Can be vectorized. Must be sortable data type (which is almost everything).\n    q : float\n        Quantile to compute, must be in (0, 1). Can be vectorized.\n\n    Returns\n    -------\n    estimate : dtype of `X`, scalar\n        Empirical `q` quantile from sample `X`.\n    \"\"\"\n    assert np.ndim(X) >= 1\n    # We could robustify things to allow the edge cases, but maybe later\n    assert np.all(0 < q) and np.all(q < 1)\n    # Currently don't support broadcasting both at same time\n    assert np.ndim(X) == 1 or np.ndim(q) == 0\n\n    n = X.shape[-1]\n    idx = _quantile(n, q)\n\n    o_stats = order_stats(X)\n    estimate = o_stats[..., idx]\n    return estimate\n\n\ndef _quantile_CI(n, q, alpha):\n    # Use in case there is -inf case from being at extreme of distn\n    idx_lower = np.fmax(0, ss.binom.ppf(alpha / 2.0, n, q)).astype(int)\n    assert np.all(isclose_lte(ss.binom.cdf(idx_lower - 1, n, q), alpha / 2.0))\n    assert np.all(isclose_lte(alpha / 2.0, ss.binom.cdf(idx_lower, n, q)))\n    assert np.all(0 <= idx_lower) and np.all(idx_lower <= n + 1)\n\n    idx_upper = np.fmax(0, ss.binom.isf(alpha / 2.0, n, q)).astype(int) + 1\n    assert np.all(isclose_lte(ss.binom.sf(idx_upper - 1, n, q), alpha / 2.0))\n    assert np.all(isclose_lte(alpha / 2.0, ss.binom.sf(idx_upper - 2, n, q)))\n    assert np.all(isclose_lte(1 - (alpha / 2.0), ss.binom.cdf(idx_upper - 1, n, q)))\n    assert np.all(isclose_lte(ss.binom.cdf(idx_upper - 2, n, q), 1 - (alpha / 2.0)))\n    assert np.all(0 <= idx_upper) and np.all(idx_upper <= n + 1)\n\n    C = ss.binom.cdf(idx_upper - 1, n, q) - ss.binom.cdf(idx_lower - 1, n, q)\n    assert np.all(isclose_lte(1.0 - alpha, C))\n\n    return idx_lower, idx_upper\n\n\ndef quantile_CI(X, q, alpha=0.05):\n    \"\"\"Calculate CI on `q` quantile from same `X` using nonparametric estimation from order statistics.\n\n    This will have alpha level of at most `alpha` due to the discrete nature of order statistics.\n\n    Parameters\n    ----------\n    X : :class:`numpy:numpy.ndarray` of shape (n,)\n        Data for quantile estimation. Can be vectorized. Must be sortable data type (which is almost everything).\n    q : float\n        Quantile to compute, must be in (0, 1). Can be vectorized.\n    alpha : float\n        False positive rate we allow for CI, must be in (0, 1). Can be vectorized.\n\n    Returns\n    -------\n    LB : dtype of `X`, scalar\n        Lower end on CI\n    UB : dtype of `X`, scalar\n        Upper end on CI\n    \"\"\"\n    assert np.ndim(X) >= 1\n    # We could robustify things to allow the edge cases, but maybe later\n    assert np.all(0 < q) and np.all(q < 1)\n    assert np.all(0 < alpha) and np.all(alpha < 1)\n    # Currently don't support broadcasting both at same time\n    assert np.ndim(X) == 1 or (np.ndim(q) == 0 and np.ndim(alpha) == 0)\n\n    n = X.shape[-1]\n    idx_lower, idx_upper = _quantile_CI(n, q, alpha)\n\n    o_stats = order_stats(X)\n    LB, UB = o_stats[..., idx_lower], o_stats[..., idx_upper]\n    return LB, UB\n\n\ndef max_quantile_CI(X, q, m, alpha=0.05):\n    \"\"\"Calculate CI on `q` quantile of distribution on max of `m` iid samples using a data set `X`.\n\n    This uses nonparametric estimation from order statistics and will have alpha level of at most `alpha` due to the\n    discrete nature of order statistics.\n\n    Parameters\n    ----------\n    X : :class:`numpy:numpy.ndarray` of shape (n,)\n        Data for quantile estimation. Can be vectorized. Must be sortable data type (which is almost everything).\n    q : float\n        Quantile to compute, must be in (0, 1). Can be vectorized.\n    m : int\n        Compute statistics for distribution on max over `m` samples. Must be ``>= 1``. Can be vectorized.\n    alpha : float\n        False positive rate we allow for CI, must be in (0, 1). Can be vectorized.\n\n    Returns\n    -------\n    estimate : dtype of `X`, scalar\n        Best estimate on `q` quantile on max over `m` iid samples.\n    LB : dtype of `X`, scalar\n        Lower end on CI\n    UB : dtype of `X`, scalar\n        Upper end on CI\n    \"\"\"\n    # X and alpha used/checked below in quantile_CI routine.\n    # We could robustify things to allow the edge cases, but maybe later\n    assert np.all(0 < q) and np.all(q < 1)\n    # Could check int but if someone wants to interpolate, we will let them.\n    assert np.all(m >= 1)\n    # Currently don't support broadcasting both at same time\n    assert np.ndim(X) == 1 or (np.ndim(q) == 0 and np.ndim(q) == 0 and np.ndim(alpha) == 0)\n\n    q = q ** (1.0 / m)\n    o_stats = order_stats(X)\n\n    n = X.shape[-1]\n    idx = _quantile(n, q)\n    idx_lower, idx_upper = _quantile_CI(n, q, alpha=alpha)\n\n    LB, UB = o_stats[..., idx_lower], o_stats[..., idx_upper]\n    # Might need to broadcast estimate out if vectorization is in alpha\n    estimate = ensure_shape(o_stats[..., idx], LB)\n    return estimate, LB, UB\n\n\ndef min_quantile_CI(X, q, m, alpha=0.05):\n    \"\"\"Calculate confidence interval on `q` quantile of distribution on min of `m` iid samples using a data set `X`.\n\n    This uses nonparametric estimation from order statistics and will have alpha level of at most `alpha` due to the\n    discrete nature of order statistics.\n\n    Parameters\n    ----------\n    X : :class:`numpy:numpy.ndarray` of shape (n,)\n        Data for quantile estimation. Can be vectorized. Must be sortable data type (which is almost everything).\n    q : float\n        Quantile to compute, must be in (0, 1). Can be vectorized.\n    m : int\n        Compute statistics for distribution on min over `m` samples. Must be ``>= 1``. Can be vectorized.\n    alpha : float\n        False positive rate we allow for CI, must be in (0, 1). Can be vectorized.\n\n    Returns\n    -------\n    estimate : dtype of `X`, scalar\n        Best estimate on `q` quantile on min over `m` iid samples.\n    LB : dtype of `X`, scalar\n        Lower end on CI\n    UB : dtype of `X`, scalar\n        Upper end on CI\n    \"\"\"\n    # X and alpha used/checked below in quantile_CI routine.\n    # We could robustify things to allow the edge cases, but maybe later\n    assert np.all(0 < q) and np.all(q < 1)\n    # Could check int but if someone wants to interp, we will let them.\n    assert np.all(m >= 1)\n    # Currently don't support broadcasting both at same time\n    assert np.ndim(X) == 1 or (np.ndim(q) == 0 and np.ndim(q) == 0 and np.ndim(alpha) == 0)\n\n    # This might have numerics issues for small q\n    q = 1.0 - (1.0 - q) ** (1.0 / m)\n    o_stats = order_stats(X)\n\n    n = X.shape[-1]\n    idx = _quantile(n, q)\n    idx_lower, idx_upper = _quantile_CI(n, q, alpha=alpha)\n\n    LB, UB = o_stats[..., idx_lower], o_stats[..., idx_upper]\n    # Might need to broadcast estimate out if vectorization is in alpha\n    estimate = ensure_shape(o_stats[..., idx], LB)\n    return estimate, LB, UB\n\n\ndef quantile_and_CI(X, q, alpha=0.05):\n    \"\"\"Calculate CI on `q` quantile from same `X` using nonparametric estimation from order statistics.\n\n    This will have alpha level of at most `alpha` due to the discrete nature of order statistics.\n\n    Parameters\n    ----------\n    X : :class:`numpy:numpy.ndarray` of shape (n,)\n        Data for quantile estimation. Can be vectorized. Must be sortable data type (which is almost everything).\n    q : float\n        Quantile to compute, must be in (0, 1). Can be vectorized.\n    alpha : float\n        False positive rate we allow for CI, must be in (0, 1). Can be vectorized.\n\n    Returns\n    -------\n    estimate : dtype of `X`, scalar\n        Empirical `q` quantile from sample `X`.\n    LB : dtype of `X`, scalar\n        Lower end on CI\n    UB : dtype of `X`, scalar\n        Upper end on CI\n    \"\"\"\n    # This routine is mostly just a wrapper routine\n    estimate, LB, UB = max_quantile_CI(X, q=q, m=1, alpha=alpha)\n    return estimate, LB, UB\n"
  },
  {
    "path": "bayesmark/random_search.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"A baseline random search in our standardized optimizer interface. Useful for baselines.\n\"\"\"\nimport numpy as np\n\nfrom bayesmark import np_util\nfrom bayesmark.space import JointSpace\n\n\ndef suggest_dict(X, y, meta, n_suggestions=1, random=np_util.random):\n    \"\"\"Stateless function to create suggestions for next query point in random search optimization.\n\n    This implements the API for general structures of different data types.\n\n    Parameters\n    ----------\n    X : list(dict)\n        Places where the objective function has already been evaluated. Not actually used in random search.\n    y : :class:`numpy:numpy.ndarray`, shape (n,)\n        Corresponding values where objective has been evaluated. Not actually used in random search.\n    meta : dict(str, dict)\n        Configuration of the optimization variables. See API description.\n    n_suggestions : int\n        Desired number of parallel suggestions in the output\n    random : :class:`numpy:numpy.random.RandomState`\n        Optionally pass in random stream for reproducibility.\n\n    Returns\n    -------\n    next_guess : list(dict)\n        List of `n_suggestions` suggestions to evaluate the objective function.\n        Each suggestion is a dictionary where each key corresponds to a parameter being optimized.\n    \"\"\"\n    # Warp and get bounds\n    space_x = JointSpace(meta)\n    X_warped = space_x.warp(X)\n    bounds = space_x.get_bounds()\n    _, n_params = _check_x_y(X_warped, y, allow_impute=True)\n    lb, ub = _check_bounds(bounds, n_params)\n\n    # Get the suggestion\n    suggest_x = random.uniform(lb, ub, size=(n_suggestions, n_params))\n\n    # Unwarp\n    next_guess = space_x.unwarp(suggest_x)\n    return next_guess\n\n\ndef _check_x_y(X, y, allow_impute=False):  # pragma: validator\n    \"\"\"Input validation for `suggest` routine.\"\"\"\n    if not (np.ndim(X) == 2):\n        raise ValueError(\"X must be 2-dimensional got %s.\" % str(np.shape(X)))\n    n_obs, n_params = np.shape(X)\n\n    assert n_params >= 1, \"We do not support suggest on empty space.\"\n\n    if not (np.shape(y) == (n_obs,)):\n        raise ValueError(\"y must be %s not %s.\" % (str((n_obs,)), str(np.shape(y))))\n\n    if not np.all(np.isfinite(X)):\n        raise ValueError(\"X must be finite.\")\n\n    n_real_obs = n_obs\n    if allow_impute:\n        if not np.all(np.isfinite(y) | np.isnan(y)):\n            raise ValueError(\"y can't contain infs even with data imputation.\")\n        n_real_obs = np.sum(np.isfinite(y))\n    else:\n        if not np.all(np.isfinite(y)):\n            raise ValueError(\"y must be finite when data imputation not used.\")\n\n    return n_real_obs, n_params\n\n\ndef _check_bounds(bounds, n_params):  # pragma: validator\n    \"\"\"Input validation for `suggest` routine.\"\"\"\n    if not (np.shape(bounds) == (n_params, 2)):\n        raise ValueError(\"bounds must have shape %s not %s.\" % (str((n_params, 2)), str(np.shape(bounds))))\n\n    lb, ub = np.asarray(bounds).T\n    if not (np.all(np.isfinite(lb)) and np.all(np.isfinite(ub))):\n        raise ValueError(\"bounds must be finite.\")\n    if not (np.all(lb <= ub)):\n        raise ValueError(\"lower bound must be less than upper bound.\")\n    return lb, ub\n"
  },
  {
    "path": "bayesmark/serialize.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"A serialization abstraction layer (SAL) to save and load experimental results. All IO of experimental results should\ngo through this module. This makes changing the backend (between different databases) transparent to the benchmark code.\n\"\"\"\nimport json\nimport os\nimport uuid\nfrom abc import ABC, abstractmethod\nfrom datetime import datetime\nfrom tempfile import mkdtemp\n\nimport xarray as xr\nfrom pathvalidate.argparse import validate_filename, validate_filepath\n\nfrom bayesmark.path_util import join_safe_r, join_safe_w\nfrom bayesmark.util import chomp, str_join_safe\n\nNEWLINE = \"\\n\"  # Just to be explicit, in case this ever gets run on Windows\nPREFIX_FMT = \"bo_%Y%m%d_%H%M%S_\"  # The format we use for generating a new database name if none is specified\n\n_XR_EXT = \".json\"  # Extension we use for dumping xr.Dataset variables\n_LOG_EXT = \".log\"  # Extension to reccomend for logging files\n_DERIVED_DIR = \"derived\"  # The folder for dervied variables (datasets)\n_LOGGING_DIR = \"log\"  # The folder to reccomend for logging\n_SETUP_STR = \"\"\"\nUser must ensure\n%s\nexists, and setup folder using\nmkdir %s\nUser must ensure equal reps of each optimizer for unbiased results.\"\"\"\n\n\nclass Serializer(ABC):\n    \"\"\"Abstract base class for the serialization abstraction layer.\n    \"\"\"\n\n    @staticmethod\n    @abstractmethod\n    def init_db(db_root, keys, db=None, exist_ok=True):\n        \"\"\"Initialize a \"database\" for storing data at the specified location.\n\n        Parameters\n        ----------\n        db_root : str\n            Absolute path to the database.\n        keys : list(str)\n            The variable names (or keys) we will store in the database for non-derived data.\n        db : str\n            The name of the database. If ``None``, a non-conflicting name will be generated.\n        exist_ok : bool\n            If true, do not raise an error if this database already exists.\n\n        Returns\n        -------\n        db : str\n            The name of the database.\n        \"\"\"\n        pass\n\n    @staticmethod\n    @abstractmethod\n    def get_keys(db_root, db):\n        \"\"\"List the non-derived keys available in the database.\n\n        Parameters\n        ----------\n        db_root : str\n            Absolute path to the database.\n        db : str\n            The name of the database.\n\n        Returns\n        -------\n        keys : list(str)\n            The variable names (or keys) in the database for non-derived data.\n        \"\"\"\n        pass\n\n    @staticmethod\n    @abstractmethod\n    def get_derived_keys(db_root, db):\n        \"\"\"List the derived keys currently available in the database.\n\n        Parameters\n        ----------\n        db_root : str\n            Absolute path to the database.\n        db : str\n            The name of the database.\n\n        Returns\n        -------\n        keys : list(str)\n            The variable names (or keys) in the database for derived data.\n        \"\"\"\n        pass\n\n    @staticmethod\n    @abstractmethod\n    def get_uuids(db_root, db, key):\n        \"\"\"List the UUIDs for the versions of a variable (non-derived key) available in the database.\n\n        Parameters\n        ----------\n        db_root : str\n            Absolute path to the database.\n        db : str\n            The name of the database.\n        keys : str\n            The variable name in the database for non-derived data.\n\n        Returns\n        -------\n        uuids : list(uuid.UUID)\n            The UUIDs for the versions of this key.\n        \"\"\"\n        pass\n\n    @staticmethod\n    @abstractmethod\n    def save(data, meta, db_root, db, key, uuid_):\n        \"\"\"Abstract method for saving experimental data, details require the type of `data`.\n        \"\"\"\n        pass\n\n    @staticmethod\n    @abstractmethod\n    def load(db_root, db, key, uuid_):\n        \"\"\"Abstract method for loading experimental data, details require the type of `data`.\n        \"\"\"\n        pass\n\n    @staticmethod\n    @abstractmethod\n    def save_derived(data, meta, db_root, db, key):\n        \"\"\"Abstract method for saving derived data, details require the type of `data`.\n        \"\"\"\n        pass\n\n    @staticmethod\n    @abstractmethod\n    def load_derived(db_root, db, key):\n        \"\"\"Abstract method for loading derived data, details require the type of `data`.\n        \"\"\"\n        pass\n\n\nclass XRSerializer(Serializer):\n    \"\"\"Serialization layer when saving and loading `xarray` datasets (currently) as `json`.\n    \"\"\"\n\n    def init_db(db_root, keys, db=None, exist_ok=True):  # pragma: io\n        XRSerializer._validate(db_root, keys, db)\n\n        if db is None:\n            folder_prefix = datetime.utcnow().strftime(PREFIX_FMT)\n            exp_subdir = mkdtemp(prefix=folder_prefix, dir=db_root)\n            db = os.path.basename(exp_subdir)\n            assert db.startswith(folder_prefix)\n            assert os.path.join(db_root, db) == exp_subdir\n        else:\n            exp_subdir = os.path.join(db_root, db)\n            os.makedirs(exp_subdir, exist_ok=exist_ok)\n\n        subdirs = [_DERIVED_DIR, _LOGGING_DIR] + list(keys)\n        for subd in subdirs:\n            os.makedirs(os.path.join(exp_subdir, subd), exist_ok=exist_ok)\n\n        return db\n\n    def init_db_manual(db_root, keys, db):\n        \"\"\"Instruction for how one would manually initialize the \"database\" on another system.\n\n        Parameters\n        ----------\n        db_root : str\n            Absolute path to the database.\n        keys : list(str)\n            The variable names (or keys) we will store in the database for non-derived data.\n        db : str\n            The name of the database.\n\n        Returns\n        -------\n        manual_setup_info : str\n            The setup instructions.\n        \"\"\"\n        XRSerializer._validate(db_root, keys, db)\n        assert db is not None, \"Must specify db name to setup manually.\"\n\n        exp_subdir = os.path.join(db_root, db)\n        subdirs = [_DERIVED_DIR, _LOGGING_DIR] + list(keys)\n        manual_setup_info = _SETUP_STR % (exp_subdir, str_join_safe(\" \", subdirs))\n        return manual_setup_info\n\n    def get_keys(db_root, db):  # pragma: io\n        XRSerializer._validate(db_root, keys=(), db=db)\n\n        keys = sorted(os.listdir(os.path.join(db_root, db)))\n        keys.remove(_DERIVED_DIR)\n        keys.remove(_LOGGING_DIR)\n        return keys\n\n    def get_derived_keys(db_root, db):  # pragma: io\n        XRSerializer._validate(db_root, keys=(), db=db)\n\n        fnames = sorted(os.listdir(os.path.join(db_root, db, _DERIVED_DIR)))\n        keys = [XRSerializer._fname_to_key(ff) for ff in fnames]\n        return keys\n\n    def get_uuids(db_root, db, key):  # pragma: io\n        XRSerializer._validate(db_root, keys=[key], db=db)\n\n        fnames = sorted(os.listdir(os.path.join(db_root, db, key)))\n        uuids = [XRSerializer._fname_to_uuid(ff) for ff in fnames]\n        return uuids\n\n    def save(data, meta, db_root, db, key, uuid_):  # pragma: io\n        \"\"\"Save a dataset under a key name in the database.\n\n        Parameters\n        ----------\n        data : :class:`xarray:xarray.Dataset`\n            An :class:`xarray:xarray.Dataset` variable we would like to store as non-derived data from an experiment.\n        meta : json-serializable\n            Associated meta-data with the experiment. This can be anything json serializable.\n        db_root : str\n            Absolute path to the database.\n        db : str\n            The name of the database.\n        key : str\n            The variable name in the database for the data.\n        uuid_ : uuid.UUID\n            The UUID to represent the version of this variable we are storing.\n        \"\"\"\n        XRSerializer._validate(db_root, keys=[key], db=db)\n\n        fname = XRSerializer._uuid_to_fname(uuid_)\n        path = (db_root, db, key, fname)\n        with open(join_safe_w(*path), \"w\") as f:\n            _dump_xr(f, ds=data, meta=meta)\n\n    def load(db_root, db, key, uuid_):  # pragma: io\n        \"\"\"Load a dataset under a key name in the database. This is the inverse of :func:`.save`.\n\n        Parameters\n        ----------\n        db_root : str\n            Absolute path to the database.\n        db : str\n            The name of the database.\n        key : str\n            The variable name in the database for the data.\n        uuid_ : uuid.UUID\n            The UUID to represent the version of this variable we want to load.\n\n        Returns\n        -------\n        data : :class:`xarray:xarray.Dataset`\n            An :class:`xarray:xarray.Dataset` variable for the non-derived data from an experiment.\n        meta : json-serializable\n            Associated meta-data with the experiment. This can be anything json serializable.\n        \"\"\"\n        XRSerializer._validate(db_root, keys=[key], db=db)\n\n        fname = XRSerializer._uuid_to_fname(uuid_)\n        path = (db_root, db, key, fname)\n        with open(join_safe_r(*path), \"r\") as f:\n            ds, meta = _load_xr(f)\n        return ds, meta\n\n    def save_derived(data, meta, db_root, db, key):  # pragma: io\n        \"\"\"Save a dataset under a key name in the database as derived data.\n\n        Parameters\n        ----------\n        data : :class:`xarray:xarray.Dataset`\n            An :class:`xarray:xarray.Dataset` variable we would like to store as derived data from experiments.\n        meta : json-serializable\n            Associated meta-data with the experiments. This can be anything json serializable.\n        db_root : str\n            Absolute path to the database.\n        db : str\n            The name of the database.\n        key : str\n            The variable name in the database for the data.\n        \"\"\"\n        XRSerializer._validate(db_root, keys=[key], db=db)\n\n        fname = XRSerializer._key_to_fname(key)\n        path = (db_root, db, _DERIVED_DIR, fname)\n        with open(join_safe_w(*path), \"w\") as f:\n            _dump_xr(f, ds=data, meta=meta)\n\n    def load_derived(db_root, db, key):  # pragma: io\n        \"\"\"Load a dataset under a key name in the database as derived data. This is the inverse of :func:`.save_derived`.\n\n        Parameters\n        ----------\n        db_root : str\n            Absolute path to the database.\n        db : str\n            The name of the database.\n        key : str\n            The variable name in the database for the data.\n\n        Returns\n        -------\n        data : :class:`xarray:xarray.Dataset`\n            An :class:`xarray:xarray.Dataset` variable for the derived data from experiments.\n        meta : json-serializable\n            Associated meta-data with the experiments. This can be anything json serializable.\n        \"\"\"\n        XRSerializer._validate(db_root, keys=[key], db=db)\n\n        fname = XRSerializer._key_to_fname(key)\n        path = (db_root, db, _DERIVED_DIR, fname)\n        with open(join_safe_r(*path), \"r\") as f:\n            data, meta = _load_xr(f)\n        return data, meta\n\n    def logging_path(db_root, db, uuid_):  # pragma: io\n        \"\"\"Get an absolute path for logging from an experiment given its UUID.\n\n        Parameters\n        ----------\n        db_root : str\n            Absolute path to the database.\n        db : str\n            The name of the database.\n        uuid_ : uuid.UUID\n            The UUID to represent this experiment.\n\n        Returns\n        -------\n        logfile : str\n            Absolute path suitable for logging in this experiment.\n        \"\"\"\n        XRSerializer._validate(db_root, keys=(), db=db)\n        assert isinstance(uuid_, uuid.UUID)\n\n        fname = uuid_.hex + _LOG_EXT\n        logfile = join_safe_w(db_root, db, _LOGGING_DIR, fname)\n        return logfile\n\n    def _fname_to_uuid(fname):\n        uuid_ = uuid.UUID(chomp(fname, _XR_EXT))\n        return uuid_\n\n    def _uuid_to_fname(uuid_):\n        assert isinstance(uuid_, uuid.UUID)  # This can be eliminated once we use type hints\n\n        fname = uuid_.hex + _XR_EXT\n        return fname\n\n    def _key_to_fname(key):\n        fname = key + _XR_EXT\n        return fname\n\n    def _fname_to_key(fname):\n        key = chomp(fname, _XR_EXT)\n        return key\n\n    def _validate(db_root, keys=(), db=None):\n        validate_filepath(db_root, platform=\"auto\")\n        assert os.path.isabs(db_root), \"db_root must be absolute path\"\n\n        if db is not None:\n            validate_filename(db, platform=\"universal\")\n\n        for kk in keys:\n            validate_filename(kk, platform=\"universal\")\n\n\ndef _dump_xr(f, ds, meta):  # pragma: io\n    \"\"\"Helper routine to `XRSerializer.save` and `XRSerializer.save_derived`.\n    \"\"\"\n    assert isinstance(ds, xr.Dataset)  # Requiring Dataset and not DataArray for now\n\n    meta_json = json.dumps(meta)  # meta can be anything that json can handle\n    # JSON dumps seems pretty good about escaping, but check to be sure\n    assert NEWLINE not in meta_json\n\n    # Built in json dumper doesn't allow us to only line break on top-level, so we manually do this for now\n    f.write('{\"meta\": %s,' % meta_json)\n    f.write(NEWLINE)\n    f.write('\"data\": ')\n\n    json.dump(ds.to_dict(), f)\n    f.write(\"}\")\n    f.write(NEWLINE)\n\n\ndef _load_xr(f):  # pragma: io\n    \"\"\"Helper routine to `XRSerializer.load` and`XRSerializer.load_derived`.\n    \"\"\"\n    all_json = json.load(f)\n    meta = all_json.pop(\"meta\")\n    ds = xr.Dataset.from_dict(all_json.pop(\"data\"))\n    return ds, meta\n"
  },
  {
    "path": "bayesmark/signatures.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Routines to compute and compare the \"signatures\" of objective functions. These are useful to make sure two different\nstudies were actually optimizing the same objective function (even if they say the same test case in the meta-data).\n\"\"\"\nimport warnings\n\nimport numpy as np\nimport pandas as pd\n\nimport bayesmark.random_search as rs\n\n# How many points to probe the function to get the signature\nN_SUGGESTIONS = 5\n\n\ndef get_func_signature(f, api_config):\n    \"\"\"Get the function signature for an objective function in an experiment.\n\n    Parameters\n    ----------\n    f : typing.Callable\n        The objective function we want to compute the signature of. This function must take inputs in the form of\n        ``dict(str, object)`` with one dictionary key per variable, and provide `float` as the output.\n    api_config : dict(str, dict)\n        Configuration of the optimization variables. See API description.\n\n    Returns\n    -------\n    signature_x : list(dict(str, object)) of shape (n_suggest,)\n        The input locations probed on signature call.\n    signature_y : list(float) of shape (n_suggest,)\n        The objective function values at the inputs points. This is the real signature.\n    \"\"\"\n    # Make sure get same sequence on every call to be a signature\n    random = np.random.RandomState(0)\n\n    signature_x = rs.suggest_dict([], [], api_config, n_suggestions=N_SUGGESTIONS, random=random)\n\n    # For now, we only take the first output as the signature. We can generalize this later.\n    signature_y = [f(xx)[0] for xx in signature_x]\n    assert np.all(np.isfinite(signature_y)), \"non-finite values found in signature for function\"\n    return signature_x, signature_y\n\n\ndef analyze_signatures(signatures):\n    \"\"\"Analyze function signatures from the experiment.\n\n    Parameters\n    ----------\n    signatures : dict(str, list(list(float)))\n        The signatures should all be the same length, so it should be 2D array\n        like.\n\n    Returns\n    -------\n    sig_errs : :class:`pandas:pandas.DataFrame`\n        rows are test cases, columns are test points.\n    signatures_median : dict(str, list(float))\n        Median signature across all repetition per test case.\n    \"\"\"\n    sig_errs = {}\n    signatures_median = {}\n    for test_case, signature_y in signatures.items():\n        assert len(signature_y) > 0, \"signature with no cases found\"\n        assert np.all(np.isfinite(signature_y)), \"non-finite values found in signature for function\"\n\n        minval = np.min(signature_y, axis=0)\n        maxval = np.max(signature_y, axis=0)\n\n        if not np.allclose(minval, maxval):\n            # Arguably, the util should not raise the warning, and these should\n            # be raised on the outside, but let's do this for simplicity.\n            warnings.warn(\n                \"Signature diverged on %s betwen %s and %s\" % (test_case, str(minval), str(maxval)), RuntimeWarning\n            )\n        sig_errs[test_case] = maxval - minval\n        # ensure serializable using tolist\n        signatures_median[test_case] = np.median(signature_y, axis=0).tolist()\n\n    # Convert to pandas so easy to append margins with max, better for disp.\n    # If we let the user convert to pandas then we don't need dep on pandas.\n    sig_errs = pd.DataFrame(sig_errs).T\n    sig_errs.loc[\"max\", :] = sig_errs.max(axis=0)\n    sig_errs.loc[:, \"max\"] = sig_errs.max(axis=1)\n\n    return sig_errs, signatures_median\n\n\ndef analyze_signature_pair(signatures, signatures_ref):\n    \"\"\"Analyze a pair of signatures (often from two sets of experiments) and return the error between them.\n\n    Parameters\n    ----------\n    signatures : dict(str, list(float))\n        Signatures from set of experiments. The signatures must all be the same length, so it should be 2D array like.\n    signatures_ref : dict(str, list(float))\n        The signatures from a reference set of experiments. The keys in `signatures` must be a subset of the signatures\n        in `signatures_ref`.\n\n    Returns\n    -------\n    sig_errs : :class:`pandas:pandas.DataFrame`\n        rows are test cases, columns are test points.\n    signatures_median : dict(str, list(float))\n        Median signature across all repetition per test case.\n    \"\"\"\n    signatures_pair = {kk: [signatures[kk], signatures_ref[kk]] for kk in signatures}\n    sig_errs, signatures_pair = analyze_signatures(signatures_pair)\n    return sig_errs, signatures_pair\n"
  },
  {
    "path": "bayesmark/sklearn_funcs.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"\nRoutines to build a standardized interface to make `sklearn` hyper-parameter tuning problems look like an objective\nfunction.\n\nThis file mostly contains a dictionary collection of all sklearn test funcs.\n\nThe format of each element in `MODELS` is:\nmodel_name: (model_class, fixed_param_dict, search_param_api_dict)\n`model_name` is an arbitrary name to refer to a certain strategy.\nAt usage time, the optimizer instance is created using:\n``model_class(**kwarg_dict)``\nThe kwarg dict is `fixed_param_dict` + `search_param_dict`. The\n`search_param_dict` comes from a optimizer which is configured using the\n`search_param_api_dict`. See the API description for information on setting up\nthe `search_param_api_dict`.\n\"\"\"\nimport os.path\nimport pickle as pkl\nimport warnings\nfrom abc import ABC, abstractmethod\n\nimport numpy as np\nfrom sklearn.ensemble import AdaBoostClassifier, AdaBoostRegressor, RandomForestClassifier, RandomForestRegressor\nfrom sklearn.linear_model import Lasso, LogisticRegression, Ridge\nfrom sklearn.metrics import get_scorer\nfrom sklearn.model_selection import cross_val_score, train_test_split\nfrom sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor\nfrom sklearn.neural_network import MLPClassifier, MLPRegressor\nfrom sklearn.svm import SVC, SVR\nfrom sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor\n\nfrom bayesmark.constants import ARG_DELIM, METRICS, MODEL_NAMES, VISIBLE_TO_OPT\nfrom bayesmark.data import METRICS_LOOKUP, ProblemType, get_problem_type, load_data\nfrom bayesmark.path_util import absopen\nfrom bayesmark.space import JointSpace\nfrom bayesmark.util import str_join_safe\n\n# Using 3 would be faster, but 5 is the most realistic CV split (5-fold)\nCV_SPLITS = 5\n\n# We should add cat variables into some of these configurations but a lot of\n# the wrappers for the BO methods really have trouble with cat types.\n\n# kNN\nknn_cfg = {\n    \"n_neighbors\": {\"type\": \"int\", \"space\": \"linear\", \"range\": (1, 25)},\n    \"p\": {\"type\": \"int\", \"space\": \"linear\", \"range\": (1, 4)},\n}\n\n# SVM\nsvm_cfg = {\n    \"C\": {\"type\": \"real\", \"space\": \"log\", \"range\": (1.0, 1e3)},\n    \"gamma\": {\"type\": \"real\", \"space\": \"log\", \"range\": (1e-4, 1e-3)},\n    \"tol\": {\"type\": \"real\", \"space\": \"log\", \"range\": (1e-5, 1e-1)},\n}\n\n# DT\ndt_cfg = {\n    \"max_depth\": {\"type\": \"int\", \"space\": \"linear\", \"range\": (1, 15)},\n    \"min_samples_split\": {\"type\": \"real\", \"space\": \"logit\", \"range\": (0.01, 0.99)},\n    \"min_samples_leaf\": {\"type\": \"real\", \"space\": \"logit\", \"range\": (0.01, 0.49)},\n    \"min_weight_fraction_leaf\": {\"type\": \"real\", \"space\": \"logit\", \"range\": (0.01, 0.49)},\n    \"max_features\": {\"type\": \"real\", \"space\": \"logit\", \"range\": (0.01, 0.99)},\n    \"min_impurity_decrease\": {\"type\": \"real\", \"space\": \"linear\", \"range\": (0.0, 0.5)},\n}\n\n# RF\nrf_cfg = {\n    \"max_depth\": {\"type\": \"int\", \"space\": \"linear\", \"range\": (1, 15)},\n    \"max_features\": {\"type\": \"real\", \"space\": \"logit\", \"range\": (0.01, 0.99)},\n    \"min_samples_split\": {\"type\": \"real\", \"space\": \"logit\", \"range\": (0.01, 0.99)},\n    \"min_samples_leaf\": {\"type\": \"real\", \"space\": \"logit\", \"range\": (0.01, 0.49)},\n    \"min_weight_fraction_leaf\": {\"type\": \"real\", \"space\": \"logit\", \"range\": (0.01, 0.49)},\n    \"min_impurity_decrease\": {\"type\": \"real\", \"space\": \"linear\", \"range\": (0.0, 0.5)},\n}\n\n# MLP with ADAM\nmlp_adam_cfg = {\n    \"hidden_layer_sizes\": {\"type\": \"int\", \"space\": \"linear\", \"range\": (50, 200)},\n    \"alpha\": {\"type\": \"real\", \"space\": \"log\", \"range\": (1e-5, 1e1)},\n    \"batch_size\": {\"type\": \"int\", \"space\": \"linear\", \"range\": (10, 250)},\n    \"learning_rate_init\": {\"type\": \"real\", \"space\": \"log\", \"range\": (1e-5, 1e-1)},\n    \"tol\": {\"type\": \"real\", \"space\": \"log\", \"range\": (1e-5, 1e-1)},\n    \"validation_fraction\": {\"type\": \"real\", \"space\": \"logit\", \"range\": (0.1, 0.9)},\n    \"beta_1\": {\"type\": \"real\", \"space\": \"logit\", \"range\": (0.5, 0.99)},\n    \"beta_2\": {\"type\": \"real\", \"space\": \"logit\", \"range\": (0.9, 1.0 - 1e-6)},\n    \"epsilon\": {\"type\": \"real\", \"space\": \"log\", \"range\": (1e-9, 1e-6)},\n}\n\n# MLP with SGD\nmlp_sgd_cfg = {\n    \"hidden_layer_sizes\": {\"type\": \"int\", \"space\": \"linear\", \"range\": (50, 200)},\n    \"alpha\": {\"type\": \"real\", \"space\": \"log\", \"range\": (1e-5, 1e1)},\n    \"batch_size\": {\"type\": \"int\", \"space\": \"linear\", \"range\": (10, 250)},\n    \"learning_rate_init\": {\"type\": \"real\", \"space\": \"log\", \"range\": (1e-5, 1e-1)},\n    \"power_t\": {\"type\": \"real\", \"space\": \"logit\", \"range\": (0.1, 0.9)},\n    \"tol\": {\"type\": \"real\", \"space\": \"log\", \"range\": (1e-5, 1e-1)},\n    \"momentum\": {\"type\": \"real\", \"space\": \"logit\", \"range\": (0.001, 0.999)},\n    \"validation_fraction\": {\"type\": \"real\", \"space\": \"logit\", \"range\": (0.1, 0.9)},\n}\n\n# AdaBoostClassifier\nada_cfg = {\n    \"n_estimators\": {\"type\": \"int\", \"space\": \"linear\", \"range\": (10, 100)},\n    \"learning_rate\": {\"type\": \"real\", \"space\": \"log\", \"range\": (1e-4, 1e1)},\n}\n\n# lasso\nlasso_cfg = {\n    \"C\": {\"type\": \"real\", \"space\": \"log\", \"range\": (1e-2, 1e2)},\n    \"intercept_scaling\": {\"type\": \"real\", \"space\": \"log\", \"range\": (1e-2, 1e2)},\n}\n\n# linear\nlinear_cfg = {\n    \"C\": {\"type\": \"real\", \"space\": \"log\", \"range\": (1e-2, 1e2)},\n    \"intercept_scaling\": {\"type\": \"real\", \"space\": \"log\", \"range\": (1e-2, 1e2)},\n}\n\nMODELS_CLF = {\n    \"kNN\": (KNeighborsClassifier, {}, knn_cfg),\n    \"SVM\": (SVC, {\"kernel\": \"rbf\", \"probability\": True}, svm_cfg),\n    \"DT\": (DecisionTreeClassifier, {\"max_leaf_nodes\": None}, dt_cfg),\n    \"RF\": (RandomForestClassifier, {\"n_estimators\": 10, \"max_leaf_nodes\": None}, rf_cfg),\n    \"MLP-adam\": (MLPClassifier, {\"solver\": \"adam\", \"early_stopping\": True}, mlp_adam_cfg),\n    \"MLP-sgd\": (\n        MLPClassifier,\n        {\"solver\": \"sgd\", \"early_stopping\": True, \"learning_rate\": \"invscaling\", \"nesterovs_momentum\": True},\n        mlp_sgd_cfg,\n    ),\n    \"ada\": (AdaBoostClassifier, {}, ada_cfg),\n    \"lasso\": (\n        LogisticRegression,\n        {\"penalty\": \"l1\", \"fit_intercept\": True, \"solver\": \"liblinear\", \"multi_class\": \"ovr\"},\n        lasso_cfg,\n    ),\n    \"linear\": (\n        LogisticRegression,\n        {\"penalty\": \"l2\", \"fit_intercept\": True, \"solver\": \"liblinear\", \"multi_class\": \"ovr\"},\n        linear_cfg,\n    ),\n}\n\n# For now, we will assume the default is to go thru all classifiers\nassert sorted(MODELS_CLF.keys()) == sorted(MODEL_NAMES)\n\nada_cfg_reg = {\n    \"n_estimators\": {\"type\": \"int\", \"space\": \"linear\", \"range\": (10, 100)},\n    \"learning_rate\": {\"type\": \"real\", \"space\": \"log\", \"range\": (1e-4, 1e1)},\n}\n\nlasso_cfg_reg = {\n    \"alpha\": {\"type\": \"real\", \"space\": \"log\", \"range\": (1e-2, 1e2)},\n    \"fit_intercept\": {\"type\": \"bool\"},\n    \"normalize\": {\"type\": \"bool\"},\n    \"max_iter\": {\"type\": \"int\", \"space\": \"log\", \"range\": (10, 5000)},\n    \"tol\": {\"type\": \"real\", \"space\": \"log\", \"range\": (1e-5, 1e-1)},\n    \"positive\": {\"type\": \"bool\"},\n}\n\nlinear_cfg_reg = {\n    \"alpha\": {\"type\": \"real\", \"space\": \"log\", \"range\": (1e-2, 1e2)},\n    \"fit_intercept\": {\"type\": \"bool\"},\n    \"normalize\": {\"type\": \"bool\"},\n    \"max_iter\": {\"type\": \"int\", \"space\": \"log\", \"range\": (10, 5000)},\n    \"tol\": {\"type\": \"real\", \"space\": \"log\", \"range\": (1e-4, 1e-1)},\n}\n\nMODELS_REG = {\n    \"kNN\": (KNeighborsRegressor, {}, knn_cfg),\n    \"SVM\": (SVR, {\"kernel\": \"rbf\"}, svm_cfg),\n    \"DT\": (DecisionTreeRegressor, {\"max_leaf_nodes\": None}, dt_cfg),\n    \"RF\": (RandomForestRegressor, {\"n_estimators\": 10, \"max_leaf_nodes\": None}, rf_cfg),\n    \"MLP-adam\": (MLPRegressor, {\"solver\": \"adam\", \"early_stopping\": True}, mlp_adam_cfg),\n    \"MLP-sgd\": (\n        MLPRegressor,  # regression crashes often with relu\n        {\n            \"activation\": \"tanh\",\n            \"solver\": \"sgd\",\n            \"early_stopping\": True,\n            \"learning_rate\": \"invscaling\",\n            \"nesterovs_momentum\": True,\n        },\n        mlp_sgd_cfg,\n    ),\n    \"ada\": (AdaBoostRegressor, {}, ada_cfg_reg),\n    \"lasso\": (Lasso, {}, lasso_cfg_reg),\n    \"linear\": (Ridge, {\"solver\": \"auto\"}, linear_cfg_reg),\n}\n\n# If both classifiers and regressors match MODEL_NAMES then the experiment\n# launcher can simply go thru the cartesian product and do all combos.\nassert sorted(MODELS_REG.keys()) == sorted(MODEL_NAMES)\n\n\nclass TestFunction(ABC):\n    \"\"\"Abstract base class for test functions in the benchmark. These do not need to be ML hyper-parameter tuning.\n    \"\"\"\n\n    def __init__(self):\n        \"\"\"Setup general test function for benchmark. We assume the test function knows the meta-data about the search\n        space, but is also stateless to fit modeling assumptions. To keep stateless, it does not do things like count\n        the number of function evaluations.\n        \"\"\"\n        # This will need to be set before using other routines\n        self.api_config = None\n\n    @abstractmethod\n    def evaluate(self, params):\n        \"\"\"Abstract method to evaluate the function at a parameter setting.\n        \"\"\"\n\n    def get_api_config(self):\n        \"\"\"Get the API config for this test problem.\n\n        Returns\n        -------\n        api_config : dict(str, dict(str, object))\n            The API config for the used model. See README for API description.\n        \"\"\"\n        assert self.api_config is not None, \"API config is not set.\"\n        return self.api_config\n\n\nclass SklearnModel(TestFunction):\n    \"\"\"Test class for sklearn classifier/regressor CV score objective functions.\n    \"\"\"\n\n    # Map our short names for metrics to the full length sklearn name\n    _METRIC_MAP = {\n        \"nll\": \"neg_log_loss\",\n        \"acc\": \"accuracy\",\n        \"mae\": \"neg_mean_absolute_error\",\n        \"mse\": \"neg_mean_squared_error\",\n    }\n\n    # This can be static and constant for now\n    objective_names = (VISIBLE_TO_OPT, \"generalization\")\n\n    def __init__(self, model, dataset, metric, shuffle_seed=0, data_root=None):\n        \"\"\"Build class that wraps sklearn classifier/regressor CV score for use as an objective function.\n\n        Parameters\n        ----------\n        model : str\n            Which classifier to use, must be key in `MODELS_CLF` or `MODELS_REG` dict depending on if dataset is\n            classification or regression.\n        dataset : str\n            Which data set to use, must be key in `DATA_LOADERS` dict, or name of custom csv file.\n        metric : str\n            Which sklearn scoring metric to use, in `SCORERS_CLF` list or `SCORERS_REG` dict depending on if dataset is\n            classification or regression.\n        shuffle_seed : int\n            Random seed to use when splitting the data into train and validation in the cross-validation splits. This\n            is needed in order to keep the split constant across calls. Otherwise there would be extra noise in the\n            objective function for varying splits.\n        data_root : str\n            Root directory to look for all custom csv files.\n        \"\"\"\n        TestFunction.__init__(self)\n        data, target, problem_type = load_data(dataset, data_root=data_root)\n        assert problem_type in (ProblemType.clf, ProblemType.reg)\n        self.is_classifier = problem_type == ProblemType.clf\n\n        # Do some validation on loaded data\n        assert isinstance(data, np.ndarray)\n        assert isinstance(target, np.ndarray)\n        assert data.ndim == 2 and target.ndim == 1\n        assert data.shape[0] == target.shape[0]\n        assert data.size > 0\n        assert data.dtype == np.float_\n        assert np.all(np.isfinite(data))  # also catch nan\n        assert target.dtype == (np.int_ if self.is_classifier else np.float_)\n        assert np.all(np.isfinite(target))  # also catch nan\n\n        model_lookup = MODELS_CLF if self.is_classifier else MODELS_REG\n        base_model, fixed_params, api_config = model_lookup[model]\n\n        # New members for model\n        self.base_model = base_model\n        self.fixed_params = fixed_params\n        self.api_config = api_config\n\n        # Always shuffle your data to be safe. Use fixed seed for reprod.\n        self.data_X, self.data_Xt, self.data_y, self.data_yt = train_test_split(\n            data, target, test_size=0.2, random_state=shuffle_seed, shuffle=True\n        )\n\n        assert metric in METRICS, \"Unknown metric %s\" % metric\n        assert metric in METRICS_LOOKUP[problem_type], \"Incompatible metric %s with problem type %s\" % (\n            metric,\n            problem_type,\n        )\n        self.scorer = get_scorer(SklearnModel._METRIC_MAP[metric])\n\n    def evaluate(self, params):\n        \"\"\"Evaluate the sklearn CV objective at a particular parameter setting.\n\n        Parameters\n        ----------\n        params : dict(str, object)\n            The varying (non-fixed) parameter dict to the sklearn model.\n\n        Returns\n        -------\n        cv_loss : float\n            Average loss over CV splits for sklearn model when tested using the settings in params.\n        \"\"\"\n        params = dict(params)  # copy to avoid modification of original\n        params.update(self.fixed_params)  # add in fixed params\n\n        # now build the skl object\n        clf = self.base_model(**params)\n\n        assert np.all(np.isfinite(self.data_X)), \"all features must be finite\"\n        assert np.all(np.isfinite(self.data_y)), \"all targets must be finite\"\n\n        # Do the x-val, ignore user warn since we expect BO to try weird stuff\n        with warnings.catch_warnings():\n            warnings.filterwarnings(\"ignore\", category=UserWarning)\n            S = cross_val_score(clf, self.data_X, self.data_y, scoring=self.scorer, cv=CV_SPLITS)\n        # Take the mean score across all x-val splits\n        cv_score = np.mean(S)\n\n        # Now let's get the generalization error for same hypers\n        clf = self.base_model(**params)\n        clf.fit(self.data_X, self.data_y)\n        generalization_score = self.scorer(clf, self.data_Xt, self.data_yt)\n\n        # get_scorer makes everything a score not a loss, so we need to negate to get the loss back\n        cv_loss = -cv_score\n        assert np.isfinite(cv_loss), \"loss not even finite\"\n        generalization_loss = -generalization_score\n        assert np.isfinite(generalization_loss), \"loss not even finite\"\n\n        # Unbox to basic float to keep it simple\n        cv_loss = cv_loss.item()\n        assert isinstance(cv_loss, float)\n        generalization_loss = generalization_loss.item()\n        assert isinstance(generalization_loss, float)\n\n        # For now, score with same objective. We can later add generalization error\n        return cv_loss, generalization_loss\n\n    @staticmethod\n    def test_case_str(model, dataset, scorer):\n        \"\"\"Generate the combined test case string from model, dataset, and scorer combination.\"\"\"\n        test_case = str_join_safe(ARG_DELIM, (model, dataset, scorer))\n        return test_case\n\n    @staticmethod\n    def inverse_test_case_str(test_case):\n        \"\"\"Inverse of `test_case_str`.\"\"\"\n        model, dataset, scorer = test_case.split(ARG_DELIM)\n        assert test_case == SklearnModel.test_case_str(model, dataset, scorer)\n        return model, dataset, scorer\n\n\nclass SklearnSurrogate(TestFunction):\n    \"\"\"Test class for sklearn classifier/regressor CV score objective function surrogates.\n    \"\"\"\n\n    # This can be static and constant for now\n    objective_names = (VISIBLE_TO_OPT, \"generalization\")\n\n    def __init__(self, model, dataset, scorer, path):\n        \"\"\"Build class that wraps sklearn classifier/regressor CV score for use as an objective function surrogate.\n\n        Parameters\n        ----------\n        model : str\n            Which classifier to use, must be key in `MODELS_CLF` or `MODELS_REG` dict depending on if dataset is\n            classification or regression.\n        dataset : str\n            Which data set to use, must be key in `DATA_LOADERS` dict, or name of custom csv file.\n        scorer : str\n            Which sklearn scoring metric to use, in `SCORERS_CLF` list or `SCORERS_REG` dict depending on if dataset is\n            classification or regression.\n        path : str\n            Root directory to look for all pickle files.\n        \"\"\"\n        TestFunction.__init__(self)\n\n        # Find the space class, we could consider putting this in pkl too\n        problem_type = get_problem_type(dataset)\n        assert problem_type in (ProblemType.clf, ProblemType.reg)\n        _, _, self.api_config = MODELS_CLF[model] if problem_type == ProblemType.clf else MODELS_REG[model]\n        self.space = JointSpace(self.api_config)\n\n        # Load the pre-trained model\n        fname = SklearnModel.test_case_str(model, dataset, scorer) + \".pkl\"\n\n        if isinstance(path, bytes):\n            # This is for test-ability, we could use mock instead.\n            self.model = pkl.loads(path)\n        else:\n            path = os.path.join(path, fname)  # pragma: io\n            assert os.path.isfile(path), \"Model file not found: %s\" % path\n\n            with absopen(path, \"rb\") as f:  # pragma: io\n                self.model = pkl.load(f)  # pragma: io\n        assert callable(getattr(self.model, \"predict\", None))\n\n    def evaluate(self, params):\n        \"\"\"Evaluate the sklearn CV objective at a particular parameter setting.\n\n        Parameters\n        ----------\n        params : dict(str, object)\n            The varying (non-fixed) parameter dict to the sklearn model.\n\n        Returns\n        -------\n        overall_loss : float\n            Average loss over CV splits for sklearn model when tested using the settings in params.\n        \"\"\"\n        x = self.space.warp([params])\n        y, = self.model.predict(x)\n\n        assert y.shape == (len(self.objective_names),)\n        assert y.dtype.kind == \"f\"\n\n        assert np.all(-np.inf < y)  # Will catch nan too\n        y = tuple(y.tolist())  # Make consistent with SklearnModel typing\n        return y\n"
  },
  {
    "path": "bayesmark/space.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Do the conversion of search spaces into a normalized cartesian space.\n\"\"\"\nimport numpy as np\nfrom scipy.interpolate import interp1d\nfrom scipy.special import expit as logistic  # because nobody calls it expit\nfrom scipy.special import logit\n\nfrom bayesmark.np_util import clip_chk, snap_to\n\nWARPED_DTYPE = np.float_\nN_GRID_DEFAULT = 8\n\n# I can't make up mind of unicode or str is better wrt to Py 2/3 compatibility\n# ==> Just make a global constant and make sure it works either way.\n# Note: if we switch to np.str_, we will also need to update doc-strings!\nCAT_DTYPE = np.unicode_\nCAT_KIND = \"U\"\nCAT_NATIVE_DTYPE = str\n# Check to make sure consistent\nassert CAT_KIND == np.dtype(CAT_DTYPE).kind\n_infered = type(CAT_DTYPE(\"\").item())\nassert CAT_NATIVE_DTYPE == _infered\n\n# ============================================================================\n# These could go into util\n# ============================================================================\n\n\ndef unravel_index(dims):\n    \"\"\"Builds tuple of coordinate arrays to traverse an `numpy` array.\n\n    Wrapper around :func:`numpy:numpy.unravel_index` that avoids bug at corner case for ``dims=()``. The fix for this\n    has been merged into the numpy master branch Oct 18, 2017 so future numpy releases will make this wrapper not\n    needed. Otherwise, ``unravel_index(X.shape)`` is equivalent to: ``np.unravel_index(range(X.size), X.shape)``.\n\n    Parameters\n    ----------\n    dims : tuple(int)\n        The shape of the array to use for unraveling ``indices``.\n\n    Returns\n    -------\n    unraveled_coords : tuple(:class:`numpy:numpy.ndarray`)\n        Each array in the tuple has shape (n,) where ``n=np.prod(dims)``.\n\n    References\n    ----------\n    unravel_index(0, ()) should return () (Trac #2120) #580\n    https://github.com/numpy/numpy/issues/580\n    Allow `unravel_index(0, ())` to return () #9884\n    https://github.com/numpy/numpy/pull/9884\n    \"\"\"\n    size = np.prod(dims)\n    if dims == () or size == 0:  # The corner case\n        return ()\n\n    idx = np.unravel_index(range(np.prod(dims)), dims)\n    return idx\n\n\ndef encode(X, labels, assume_sorted=False, dtype=bool, assume_valid=False):\n    \"\"\"Perform one hot encoding of categorical data in :class:`numpy:numpy.ndarray` variable `X` of any dimension.\n\n    Parameters\n    ----------\n    X : :class:`numpy:numpy.ndarray` of shape (...)\n        Categorical values of any standard type. Vectorized to work for any dimensional `X`.\n    labels : :class:`numpy:numpy.ndarray` of shape (n,)\n        Complete list of all possible labels. List is flattened if it is not already 1 dimensional.\n    assume_sorted : bool\n        If true, assume labels is already sorted and unique. This saves the computational cost of calling\n        :func:`numpy:numpy.unique`.\n    dtype : type\n        Desired data of feature array. One-hot is most logically `bool`, but feature matrices are usually `float`.\n    assume_valid : bool\n        If true, assume all element of `X` are in the list `labels`. This saves the computational cost of verifying\n        `X` are in `labels`. If true and a non-label `X` occurs this routine will silently give bogus result.\n\n    Returns\n    -------\n    Y : :class:`numpy:numpy.ndarray` of shape (..., n)\n        One-hot encoding of `X`. Extra dimension is appended at end for the one-hot vector. It has data type `dtype`.\n    \"\"\"\n    X = np.asarray(X)\n    labels = np.asarray(labels) if assume_sorted else np.unique(labels)\n    check_array(labels, \"labels\", pre=True, ndim=1, min_size=1)\n\n    idx = np.searchsorted(labels, X)\n    # If x is not even in labels then this will fail. This is not ValueError\n    # because the user explictly asked for this using argument assume_valid.\n    assert assume_valid or np.all(np.asarray(labels[idx]) == X)\n\n    # This is using some pro np indexing technique to vectorize across all\n    # possible input dimensions for X in the same code.\n    Y = np.zeros(X.shape + (len(labels),), dtype=dtype)\n    Y[unravel_index(X.shape) + (idx.ravel(),)] = True\n    return Y\n\n\ndef decode(Y, labels, assume_sorted=False):\n    \"\"\"Perform inverse of one-hot encoder `encode`.\n\n    Parameters\n    ----------\n    Y : :class:`numpy:numpy.ndarray` of shape (..., n)\n        One-hot encoding of categorical data `X`. Extra dimension is appended at end for the one-hot vector. Maximum\n        element is taken if there is more than one non-zero entry in one-hot vector.\n    labels : :class:`numpy:numpy.ndarray` of shape (n,)\n        Complete list of all possible labels. List is flattened if it is not already 1-dimensional.\n    assume_sorted : bool\n        If true, assume labels is already sorted and unique. This saves the computational cost of calling\n        :func:`numpy:numpy.unique`.\n\n    Returns\n    -------\n    X : :class:`numpy:numpy.ndarray` of shape (...)\n        Categorical values corresponding to one-hot encoded `Y`.\n    \"\"\"\n    Y = np.asarray(Y)\n    labels = np.asarray(labels) if assume_sorted else np.unique(labels)\n    check_array(labels, \"labels\", pre=True, ndim=1, min_size=1)\n    check_array(Y, \"Y\", pre=True, shape_endswith=(len(labels),))\n\n    idx = np.argmax(Y, axis=-1)\n    X = labels[idx]\n    return X\n\n\ndef _error(msg, pre=False):  # pragma: validator\n    \"\"\"Helper routine for :func:`.check_array`.\n\n    This could probably be made cleaner by using raise to create the assert.\n    \"\"\"\n    if pre:\n        raise ValueError(msg)\n    else:\n        assert False, msg\n\n\ndef check_array(\n    X,\n    name,\n    pre=False,\n    ndim=None,\n    shape=None,\n    shape_endswith=(),\n    min_size=0,\n    dtype=None,\n    kind=None,\n    allow_infinity=True,\n    allow_nan=True,\n    unsorted=True,\n    whitelist=None,\n):  # pragma: validator\n    \"\"\"Like :func:`sklearn:sklearn.utils.check_array` but better.\n\n    Check specified property of input array `X`. If an argument is not specified it passes by default.\n\n    Parameters\n    ----------\n    X : :class:`numpy:numpy.ndarray`\n        `numpy` array we want to validate.\n    name : str\n        Human readable name of of variable to refer to it in error messages. Note this can include spaces unlike simply\n        using the variable name.\n    pre : bool\n        If true, interpret this as check as validating pre-conditions to a function and will raise a `ValueError` if a\n        check fails. If false, assumes we are checking post-conditions and will raise an assertion failure.\n    ndim : int\n        Expected value of ``X.ndim``.\n    shape : tuple(int)\n        Expected value of ``X.shape``.\n    shape_endswith : tuple(int)\n        Expected that ``X.shape`` ends with `shape_endswith`. This is useful in broadcasting where extra dimensions\n        might be added on.\n    min_size : int\n        Minimum value for ``X.size``\n    dtype : dtype\n        Expected value of ``X.dtype``.\n    kind : str\n        Expected value of ``X.dtype.kind``. This is `'f'` for `float`, `'i'` for `int`, and so on.\n    allow_infinity : bool\n        If false, the check fails when `X` contains inf or ``-inf``.\n    allow_nan : bool\n        If false, the check fails when `X` contains a ``NaN``.\n    unsorted : bool\n        If false, the check fails when `X` is not in sorted order. This is designed to even work with string arrays.\n    whitelist : :class:`numpy:numpy.ndarray`\n        Array containing allowed values for `X`. If an element of `X` is not found in `whitelist`, the check fails.\n    \"\"\"\n    if (ndim is not None) and X.ndim != ndim:\n        _error(\"Expected %d dimensions for %s, got %d\" % (ndim, name, X.ndim), pre)\n\n    if (shape is not None) and X.shape != shape:\n        _error(\"Expected shape %s for %s, got %s\" % (str(shape), name, str(X.shape)), pre)\n\n    if len(shape_endswith) > 0:\n        if X.shape[-len(shape_endswith) :] != shape_endswith:\n            if len(shape_endswith) == 1:\n                _error(\"Expected shape (..., %d) for %s, got %s\" % (shape_endswith[0], name, str(X.shape)), pre)\n            else:\n                _error(\"Expected shape (..., %s for %s, got %s\" % (str(shape_endswith)[1:], name, str(X.shape)), pre)\n\n    if (min_size > 0) and (X.size < min_size):\n        _error(\"%s needs at least %d elements, it has %d\" % (name, min_size, X.size), pre)\n\n    if (dtype is not None) and X.dtype != np.dtype(dtype):\n        _error(\"Expected dtype %s for %s, got %s\" % (str(np.dtype(dtype)), name, str(X.dtype)), pre)\n\n    if (kind is not None) and X.dtype.kind != kind:\n        _error(\"Expected array with kind %s for %s, got %s\" % (kind, name, str(X.dtype.kind)), pre)\n\n    if (not allow_infinity) and np.any(np.abs(X) == np.inf):\n        _error(\"Infinity is not allowed in %s\" % name, pre)\n\n    if (not allow_nan) and np.any(np.isnan(X)):\n        _error(\"NaN is not allowed in %s\" % name, pre)\n\n    if whitelist is not None:\n        ok = np.all([xx in whitelist for xx in np.nditer(X, [\"zerosize_ok\"])])\n        if not ok:\n            _error(\"Expected all elements of %s to be in %s\" % (name, str(whitelist)), pre)\n\n    # Only do this check in 1D\n    if X.ndim == 1 and (not unsorted) and np.any(X[:-1] > X[1:]):\n        _error(\"Expected sorted input for %s\" % name, pre)\n\n\n# ============================================================================\n# Setup warping dictionaries\n# ============================================================================\n\n\ndef identity(x):\n    \"\"\"Helper function that perform warping in linear space. Sort of a no-op.\n\n    Parameters\n    ----------\n    x : scalar\n        Input variable in linear space. Can be any numeric type and is vectorizable.\n\n    Returns\n    -------\n    y : scalar\n        Same as input `x`.\n    \"\"\"\n    y = x\n    return y\n\n\ndef bilog(x):\n    \"\"\"Bilog warping function. Extension of log to work with negative numbers.\n\n    ``Bilog(x) ~= log(x)`` for large `x` or ``-log(abs(x))`` if `x` is negative. However, the bias term ensures good\n    behavior near 0 and ``bilog(0) = 0``.\n\n    Parameters\n    ----------\n    x : scalar\n        Input variable in linear space. Can be any numeric type and is vectorizable.\n\n    Returns\n    -------\n    y : float\n        The bilog of `x`.\n    \"\"\"\n    y = np.sign(x) * np.log(1.0 + np.abs(x))\n    return y\n\n\ndef biexp(x):\n    \"\"\"Inverse of :func:`.bilog` function.\n\n    Parameters\n    ----------\n    x : scalar\n        Input variable in linear space. Can be any numeric type and is vectorizable.\n\n    Returns\n    -------\n    y : float\n        The biexp of `x`.\n    \"\"\"\n    y = np.sign(x) * (np.exp(np.abs(x)) - 1.0)\n    return y\n\n\nWARP_DICT = {\"linear\": identity, \"log\": np.log, \"logit\": logit, \"bilog\": bilog}\nUNWARP_DICT = {\"linear\": identity, \"log\": np.exp, \"logit\": logistic, \"bilog\": biexp}\n\n# ============================================================================\n# Setup spaces class hierarchy\n# ============================================================================\n\n\nclass Space(object):\n    \"\"\"Base class for all types of variables.\n    \"\"\"\n\n    def __init__(self, dtype, default_round, warp=\"linear\", values=None, range_=None):\n        \"\"\"Generic constructor of `Space` class.\n\n        Not intended to be called directly but instead by child classes. However, `Space` is not an abstract class and\n        will not give an error when instantiated.\n        \"\"\"\n        self.dtype = dtype\n        assert warp in WARP_DICT, \"invalid space %s, allowed spaces are: %s\" % (str(warp), str(WARP_DICT.keys()))\n        self.warp_f = WARP_DICT[warp]\n        self.unwarp_f = UNWARP_DICT[warp]\n\n        # Setup range and rounding if values is suplied\n        assert (values is None) != (range_ is None)\n        round_to_values = default_round\n        if range_ is None:  # => value is not None\n            # Debatable if unique should be done before or after cast. But I\n            # think after is better, esp. when changing precisions.\n            values = np.asarray(values, dtype=dtype)\n            values = np.unique(values)  # values now 1D ndarray no matter what\n            check_array(\n                values,\n                \"unique values\",\n                pre=True,\n                ndim=1,\n                dtype=dtype,\n                min_size=2,\n                allow_infinity=False,\n                allow_nan=False,\n            )\n\n            # Extrapolation might happen due to numerics in type conversions.\n            # Bounds checking is still done in validate routines.\n            round_to_values = interp1d(values, values, kind=\"nearest\", fill_value=\"extrapolate\")\n            range_ = (values[0], values[-1])\n        # Save values and rounding\n        # Values is either None or was validated inside if statement\n        self.values = values\n        self.round_to_values = round_to_values\n\n        # Note that if dtype=None that is the default for asarray.\n        range_ = np.asarray(range_, dtype=dtype)\n        check_array(range_, \"range\", pre=True, shape=(2,), dtype=dtype, unsorted=False)\n        # Save range info, with input validation and post validation\n        self.lower, self.upper = range_\n\n        # Convert to warped bounds too with lots of post validation\n        self.lower_warped, self.upper_warped = self.warp_f(range_[..., None]).astype(WARPED_DTYPE, copy=False)\n        check_array(\n            self.lower_warped,\n            \"warped lower bound %s(%.1f)\" % (warp, self.lower),\n            ndim=1,\n            pre=True,\n            dtype=WARPED_DTYPE,\n            allow_infinity=False,\n            allow_nan=False,\n        )\n        # Should never happen if warpers are strictly monotonic:\n        assert np.all(self.lower_warped <= self.upper_warped)\n\n        # Make sure a bit bigger to keep away from lower due to numerics\n        self.upper_warped = np.maximum(self.upper_warped, np.nextafter(self.lower_warped, np.inf))\n        check_array(\n            self.upper_warped,\n            \"warped upper bound %s(%.1f)\" % (warp, self.upper),\n            pre=True,\n            shape=self.lower_warped.shape,\n            dtype=WARPED_DTYPE,\n            allow_infinity=False,\n            allow_nan=False,\n        )\n        # Should never happen if warpers are strictly monotonic:\n        assert np.all(self.lower_warped < self.upper_warped)\n\n    def validate(self, X, pre=False):\n        \"\"\"Routine to validate inputs to warp.\n\n        This routine does not perform any checking on the dimensionality of `X` and is fully vectorized.\n        \"\"\"\n        X = np.asarray(X, dtype=self.dtype)\n\n        if self.values is None:\n            X = clip_chk(X, self.lower, self.upper)\n        else:\n            check_array(X, \"X\", pre=pre, whitelist=self.values)\n\n        return X\n\n    def validate_warped(self, X, pre=False):\n        \"\"\"Routine to validate inputs to unwarp. This routine is vectorized, but `X` must have at least 1-dimension.\n        \"\"\"\n        X = np.asarray(X, dtype=WARPED_DTYPE)\n        check_array(X, \"X\", pre=pre, shape_endswith=(len(self.lower_warped),))\n\n        X = clip_chk(X, self.lower_warped, self.upper_warped)\n        return X\n\n    def warp(self, X):\n        \"\"\"Warp inputs to a continuous space.\n\n        Parameters\n        ----------\n        X : :class:`numpy:numpy.ndarray` of shape (...)\n            Input variables to warp. This is vectorized to work in any dimension, but it must have the same type code\n            as the class, which is in `self.type_code`.\n\n        Returns\n        -------\n        X_w : :class:`numpy:numpy.ndarray` of shape (..., m)\n            Warped version of input space. By convention there is an extra dimension on warped array.\n            Currently, ``m=1`` for all warpers. `X_w` will have a `float` type.\n        \"\"\"\n        X = self.validate(X, pre=True)\n\n        X_w = self.warp_f(X)\n        X_w = X_w[..., None]  # Convention is that warped has extra dim\n\n        X_w = self.validate_warped(X_w)  # Ensures of WAPRED_DTYPE\n        check_array(X_w, \"X\", ndim=X.ndim + 1, dtype=WARPED_DTYPE)\n        return X_w\n\n    def unwarp(self, X_w):\n        \"\"\"Inverse of `warp` function.\n\n        Parameters\n        ----------\n        X_w : :class:`numpy:numpy.ndarray` of shape (..., m)\n            Warped version of input space. This is vectorized to work in any dimension. But, by convention, there is an\n            extra dimension on the warped array. Currently, the last dimension ``m=1`` for all warpers. `X_w` must be of\n            a `float` type.\n\n        Returns\n        -------\n        X : :class:`numpy:numpy.ndarray` of shape (...)\n            Unwarped version of `X_w`. `X` will have the same type code as the class, which is in `self.type_code`.\n        \"\"\"\n        X_w = self.validate_warped(X_w, pre=True)\n\n        X = clip_chk(self.unwarp_f(X_w[..., 0]), self.lower, self.upper)\n        X = self.round_to_values(X)\n\n        X = self.validate(X)  # Ensures of dtype\n        check_array(X, \"X\", ndim=X_w.ndim - 1, dtype=self.dtype)\n        return X\n\n    def get_bounds(self):\n        \"\"\"Get bounds of the warped space.\n\n        Returns\n        -------\n        bounds : :class:`numpy:numpy.ndarray` of shape (D, 2)\n            Bounds in the warped space. First column is the lower bound and the second column is the upper bound.\n            Calling ``bounds.tolist()`` gives the bounds in the standard form expected by `scipy` optimizers:\n            ``[(lower_1, upper_1), ..., (lower_n, upper_n)]``.\n        \"\"\"\n        bounds = np.stack((self.lower_warped, self.upper_warped), axis=1)\n        check_array(bounds, \"bounds\", shape=(len(self.lower_warped), 2), dtype=WARPED_DTYPE)\n        return bounds\n\n    def grid(self, max_interp=N_GRID_DEFAULT):\n        \"\"\"Return grid spanning the original (unwarped) space.\n\n        Parameters\n        ----------\n        max_interp : int\n            The number of points to use in grid space when a range and not values are used to define the space.\n            Must be ``>= 0``.\n\n        Returns\n        -------\n        values : list\n            Grid spanning the original space. This is simply `self.values` if a grid has already been specified,\n            otherwise it is just grid across the range.\n        \"\"\"\n        values = self.values\n        if values is None:\n            vw = np.linspace(self.lower_warped, self.upper_warped, max_interp)\n            # Some spaces like int make result in duplicates after unwarping\n            # so we apply unique to avoid this. However this will usually be\n            # wasted computation.\n            values = np.unique(self.unwarp(vw[:, None]))\n            check_array(values, \"values\", ndim=1, dtype=self.dtype)\n\n        # Best to convert to list to make sure in native type\n        values = values.tolist()\n        return values\n\n\nclass Real(Space):\n    \"\"\"Space for transforming real variables to normalized space (after warping).\n    \"\"\"\n\n    def __init__(self, warp=\"linear\", values=None, range_=None):\n        \"\"\"Build Real space class.\n\n        Parameters\n        ----------\n        warp : {'linear', 'log', 'logit', 'bilog'}\n            Which warping type to apply to the space. The warping is applied in the original space. That is, in a space\n            with ``warp='log'`` and ``range_=(2.0, 10.0)``, the value 2.0 warps to ``log(2)``, not ``-inf`` as in some\n            other frameworks.\n        values : None or list(float)\n            Possible values for space to take. Values must be of `float` type.\n        range_ : None or :class:`numpy:numpy.ndarray` of shape (2,)\n            Array with (lower, upper) pair with limits of space. Note that one must specify `values` or `range_`, but\n            not both. `range_` must be composed of `float`.\n        \"\"\"\n        assert warp is not None, \"warp/space not specified for real\"\n        Space.__init__(self, np.float_, identity, warp, values, range_)\n\n\nclass Integer(Space):\n    \"\"\"Space for transforming integer variables to continuous normalized space.\n    \"\"\"\n\n    def __init__(self, warp=\"linear\", values=None, range_=None):\n        \"\"\"Build Integer space class.\n\n        Parameters\n        ----------\n        warp : {'linear', 'log', 'bilog'}\n            Which warping type to apply to the space. The warping is applied in the original space. That is, in a space\n            with ``warp='log'`` and ``range_=(2, 10)``, the value 2 warps to ``log(2)``, not ``-inf`` as in some other\n            frameworks. There are no settings with integers that are compatible with the logit warp.\n        values : None or list(float)\n            Possible values for space to take. Values must be of `int` type.\n        range_ : None or :class:`numpy:numpy.ndarray` of shape (2,)\n            Array with (lower, upper) pair with limits of space. Note that one must specify `values` or `range_`, but\n            not both. `range_` must be composed of `int`.\n        \"\"\"\n        assert warp is not None, \"warp/space not specified for int\"\n        Space.__init__(self, np.int_, np.round, warp, values, range_)\n\n\nclass Boolean(Space):\n    \"\"\"Space for transforming Boolean variables to continuous normalized space.\n    \"\"\"\n\n    def __init__(self, warp=None, values=None, range_=None):\n        \"\"\"Build Boolean space class.\n\n        Parameters\n        ----------\n        warp : None\n            Must be omitted or None, provided for consitency with other types.\n        values : None\n            Must be omitted or None, provided for consitency with other types.\n        range_ : None\n            Must be omitted or None, provided for consitency with other types.\n        \"\"\"\n        assert warp is None, \"cannot warp bool\"\n        assert (values is None) and (range_ is None), \"cannot pass in values or range for bool\"\n        self.dtype = np.bool_\n        self.warp_f = identity\n        self.unwarp_f = identity\n\n        self.values = np.array([False, True], dtype=np.bool_)\n        self.round_to_values = np.round\n\n        self.lower, self.upper = self.dtype(False), self.dtype(True)\n        self.lower_warped = np.array([0.0], dtype=WARPED_DTYPE)\n        self.upper_warped = np.array([1.0], dtype=WARPED_DTYPE)\n\n\nclass Categorical(Space):\n    \"\"\"Space for transforming categorical variables to continuous normalized space.\n    \"\"\"\n\n    def __init__(self, warp=None, values=None, range_=None):\n        \"\"\"Build Integer space class.\n\n        Parameters\n        ----------\n        warp : None\n            Must be omitted or None, provided for consitency with other types.\n        values : list(str)\n            Possible values for space to take. Values must be unicode strings. Requiring type unicode (``'U'``) rather\n            than strings (``'S'``) corresponds to the native string type.\n        range_ : None\n            Must be omitted or None, provided for consitency with other types.\n        \"\"\"\n        assert warp is None, \"cannot warp cat\"\n        assert values is not None, \"must pass in explicit values for cat\"\n        assert range_ is None, \"cannot pass in range for cat\"\n\n        values = np.unique(values)  # values now 1D ndarray no matter what\n        check_array(values, \"values\", pre=True, ndim=1, kind=CAT_KIND, min_size=2)\n        self.values = values\n\n        self.dtype = CAT_DTYPE\n        # Debatable if decode should go in unwarp or round_to_values\n\n        self.warp_f = self._encode\n        self.unwarp_f = identity\n        self.round_to_values = self._decode\n\n        self.lower, self.upper = None, None  # Don't need them\n        self.lower_warped = np.zeros(len(values), dtype=WARPED_DTYPE)\n        self.upper_warped = np.ones(len(values), dtype=WARPED_DTYPE)\n\n    def _encode(self, x):\n        return encode(x, self.values, True, WARPED_DTYPE, True)\n\n    def _decode(self, x):\n        return decode(x, self.values, True)\n\n    def warp(self, X):\n        \"\"\"Warp inputs to a continuous space.\n\n        Parameters\n        ----------\n        X : :class:`numpy:numpy.ndarray` of shape (...)\n            Input variables to warp. This is vectorized to work in any dimension, but it must have the same\n            type code as the class, which is unicode (``'U'``) for the :class:`.Categorical` space.\n\n        Returns\n        -------\n        X_w : :class:`numpy:numpy.ndarray` of shape (..., m)\n            Warped version of input space. By convention there is an extra dimension on warped array. The warped space\n            has a one-hot encoding and therefore `m` is the number of possible values in the space. `X_w` will have\n            a `float` type.\n        \"\"\"\n        X = self.validate(X, pre=True)\n\n        X_w = self.warp_f(X)\n\n        # Probably over kill to validate here too, but why not:\n        X_w = self.validate_warped(X_w)\n        check_array(X_w, \"X\", ndim=X.ndim + 1, dtype=WARPED_DTYPE)\n        return X_w\n\n    def unwarp(self, X_w):\n        \"\"\"Inverse of `warp` function.\n\n        Parameters\n        ----------\n        X_w : :class:`numpy:numpy.ndarray` of shape (..., m)\n            Warped version of input space. The warped space has a one-hot encoding and therefore `m` is the number of\n            possible values in the space. `X_w` will have a `float` type. Non-zero/one values are allowed in `X_w`.\n            The maximal element in the vector is taken as the encoded value.\n\n        Returns\n        -------\n        X : :class:`numpy:numpy.ndarray` of shape (...)\n            Unwarped version of `X_w`. `X` will have same type code as the :class:`.Categorical` class, which is\n            unicode (``'U'``).\n        \"\"\"\n        X_w = self.validate_warped(X_w, pre=True)\n\n        X = self.round_to_values(self.unwarp_f(X_w))\n\n        X = self.validate(X)\n        check_array(X, \"X\", ndim=X_w.ndim - 1, kind=CAT_KIND)\n        return X\n\n\n# Treat ordinal identically to categorical for now\nSPACE_DICT = {\"real\": Real, \"int\": Integer, \"bool\": Boolean, \"cat\": Categorical, \"ordinal\": Categorical}\n\n# ============================================================================\n# Setup code for joint spaces over multiple parameters with different configs\n# ============================================================================\n\n\nclass JointSpace(object):\n    \"\"\"Combination of multiple :class:`.Space` objectives to transform multiple variables at the same time (jointly).\n    \"\"\"\n\n    def __init__(self, meta):\n        \"\"\"Build Real space class.\n\n        Parameters\n        ----------\n        meta : dict(str, dict)\n            Configuration of variables in joint space. See API description.\n        \"\"\"\n        assert len(meta) > 0  # Unclear what to do with empty space\n\n        # Lock in an order if not ordered dict, sorted helps reproducibility\n        self.param_list = sorted(meta.keys())\n\n        # Might as well pre-validate a bit here\n        for param, config in meta.items():\n            assert config[\"type\"] in SPACE_DICT, \"invalid input type %s\" % config[\"type\"]\n\n        spaces = {\n            param: SPACE_DICT[config[\"type\"]](\n                config.get(\"space\", None), config.get(\"values\", None), config.get(\"range\", None)\n            )\n            for param, config in meta.items()\n        }\n        self.spaces = spaces\n\n        self.blocks = np.cumsum([len(spaces[param].get_bounds()) for param in self.param_list])\n\n    def validate(self, X):\n        \"\"\"Raise `ValueError` if X does not match the format expected for a\n        joint space.\"\"\"\n        for record in X:\n            if self.param_list != sorted(record.keys()):\n                raise ValueError(\"Expected joint space keys %s, but got %s\", (self.param_list, sorted(record.keys())))\n            for param in self.param_list:\n                self.spaces[param].validate([record[param]], pre=True)\n        # Return X back so we have option to cast it to list or whatever later\n        return X\n\n    def warp(self, X):\n        \"\"\"Warp inputs to a continuous space.\n\n        Parameters\n        ----------\n        X : list(dict(str, object)) of shape (n,)\n            List of `n` points in the joint space to warp. Each list element is a dictionary where each key corresponds\n            to a variable in the joint space. Keys can be be missing in the records and the according warped variables\n            will be ``nan``.\n\n        Returns\n        -------\n        X_w : :class:`numpy:numpy.ndarray` of shape (n, m)\n            Warped version of input space. Result is 2D `float` np array. `n` is the number of input points, length\n            of `X`. `m` is the size of the joint warped space, which can be inferred by calling :func:`.get_bounds`.\n        \"\"\"\n        # It would be nice to have cleaner way to deal with this corner case\n        if len(X) == 0:\n            return np.zeros((0, self.blocks[-1]), dtype=WARPED_DTYPE)\n\n        X_w = [\n            np.concatenate(\n                [\n                    self.spaces[param].warp(record[param])\n                    if param in record\n                    else np.full(len(self.spaces[param].get_bounds()), np.nan)\n                    for param in self.param_list\n                ]\n            )\n            for record in X\n        ]\n        X_w = np.stack(X_w, axis=0)\n        check_array(X_w, \"X\", shape=(len(X), self.blocks[-1]), dtype=WARPED_DTYPE)\n        return X_w\n\n    def unwarp(self, X_w, fixed_vals={}):\n        \"\"\"Inverse of :func:`.warp`.\n\n        Parameters\n        ----------\n        X_w : :class:`numpy:numpy.ndarray` of shape (n, m)\n            Warped version of input space. Must be 2D `float` :class:`numpy:numpy.ndarray`. `n` is the number of\n            separate points in the warped joint space. `m` is the size of the joint warped space, which can be inferred\n            in advance by calling :func:`.get_bounds`.\n        fixed_vals : dict\n            Subset of variables we want to keep fixed in X. Unwarp checks that the unwarped version of `X_w` matches\n            `fixed_vals` up to numerical error. Otherwise, an error is raised.\n\n        Returns\n        -------\n        X : list(dict(str, object)) of shape (n,)\n            List of `n` points in the joint space to warp. Each list element is a dictionary where each key corresponds\n            to a variable in the joint space.\n        \"\"\"\n        X_w = np.asarray(X_w)\n        check_array(X_w, \"X\", ndim=2, shape_endswith=(self.blocks[-1],), dtype=WARPED_DTYPE)\n        N = X_w.shape[0]\n\n        # Use snap_to to make sure we get exact value (no-round off) for cases where we know expected answer\n        X = {\n            param: snap_to(self.spaces[param].unwarp(xx), fixed_vals.get(param, None))\n            for param, xx in zip(self.param_list, np.hsplit(X_w, self.blocks[:-1]))\n        }\n        # Convert dict of arrays to list of dicts, this would not be needed if\n        # we used pandas but we do not want to add it as a dep. np.asscalar and\n        # .item() appear to be the same thing but asscalar seems more readable.\n        X = [{param: X[param][ii].item() for param in self.param_list} for ii in range(N)]\n        return X\n\n    def get_bounds(self):\n        \"\"\"Get bounds of the warped joint space.\n\n        Returns\n        -------\n        bounds : :class:`numpy:numpy.ndarray` of shape (m, 2)\n            Bounds in the warped space. First column is the lower bound and the second column is the upper bound.\n            ``bounds.tolist()`` gives the bounds in the standard form expected by scipy optimizers:\n            ``[(lower_1, upper_1), ..., (lower_n, upper_n)]``.\n        \"\"\"\n        bounds = np.concatenate([self.spaces[param].get_bounds() for param in self.param_list], axis=0)\n        check_array(bounds, \"bounds\", shape_endswith=(2,), dtype=WARPED_DTYPE)\n        return bounds\n\n    def grid(self, max_interp=N_GRID_DEFAULT):\n        \"\"\"Return grid spanning the original (unwarped) space.\n\n        Parameters\n        ----------\n        max_interp : int\n            The number of points to use in grid space when a range and not values are used to define the space.\n            Must be ``>= 0``.\n\n        Returns\n        -------\n        axes : dict(str, list)\n            Grids spanning the original spaces of each variable. For each variable, this is simply ``self.values``\n            if a grid has already been specified, otherwise it is just grid across the range.\n        \"\"\"\n        axes = {var_name: space.grid(max_interp=max_interp) for var_name, space in self.spaces.items()}\n        return axes\n"
  },
  {
    "path": "bayesmark/stats.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"General statistic tools useful in the benchmark.\n\"\"\"\nimport numpy as np\nimport scipy.stats as sst\n\n\ndef robust_standardize(X, q_level=0.5):\n    \"\"\"Perform robust standardization of data matrix `X` over axis 0.\n\n    Similar to :func:`sklearn:sklearn.preprocessing.robust_scale` except also does a Gaussian\n    adjustment rescaling so that if Gaussian data is passed in the transformed\n    data will, in large `n`, be distributed as N(0,1). See sklearn feature\n    request #10139 on github.\n\n    Parameters\n    ----------\n    X : :class:`numpy:numpy.ndarray` of shape (n, ...)\n        Array containing elements standardize. Require ``n >= 2``.\n    q_level : scalar\n        Must be in [0, 1]. Inter-quartile range to use for scale estimation.\n\n    Returns\n    -------\n    X : :class:`numpy:numpy.ndarray` of shape (n, ...)\n        Elements of input `X` standardization.\n    \"\"\"\n    X = np.asarray(X)\n    assert X.ndim in (1, 2)\n    assert np.all(np.isfinite(X))\n    assert 0.0 < q_level and q_level <= 1.0\n    assert X.shape[0] >= 2\n\n    mu = np.median(X, axis=0)\n\n    q0, q1 = 0.5 * (1.0 - q_level), 0.5 * (1.0 + q_level)\n    v = np.percentile(X, 100 * q1, axis=0) - np.percentile(X, 100 * q0, axis=0)\n    v = np.asarray(v)\n    v[v == 0.0] = 1.0\n\n    X_ss = (X - mu) / v\n    # Rescale to match scale of N(0,1)\n    X_ss = X_ss * (sst.norm.ppf(q1) - sst.norm.ppf(q0))\n    assert X.shape == X_ss.shape\n    return X_ss\n\n\ndef t_EB(x, alpha=0.05, axis=-1):\n    \"\"\"Get t-statistic based error bars on mean of `x`.\n\n    Parameters\n    ----------\n    x : :class:`numpy:numpy.ndarray` of shape (n_samples,)\n        Data points to estimate mean. Must not be empty or contain ``NaN``.\n    alpha : float\n        The alpha level (``1-confidence``) probability (in (0, 1)) to construct confidence interval from t-statistic.\n    axis : int\n        The axis on `x` where we compute the t-statistics. The function is vectorized over all other dimensions.\n\n    Returns\n    -------\n    EB : float\n        Size of error bar on mean (``>= 0``). The confidence interval is ``[mean(x) - EB, mean(x) + EB]``. `EB` is\n        ``inf`` when ``len(x) <= 1``. Will be ``NaN`` if there are any infinite values in `x`.\n    \"\"\"\n    assert np.ndim(x) >= 1 and (not np.any(np.isnan(x)))\n    assert np.ndim(alpha) == 0\n    assert 0.0 < alpha and alpha < 1.0\n\n    N = np.shape(x)[axis]\n    if N <= 1:\n        return np.full(np.sum(x, axis=axis).shape, fill_value=np.inf)\n\n    confidence = 1 - alpha\n    # loc cancels out when we just want EB anyway\n    LB, UB = sst.t.interval(confidence, N - 1, loc=0.0, scale=1.0)\n    assert not (LB > UB)\n    # Just multiplying scale=ss.sem(x) is better for when scale=0\n    EB = 0.5 * sst.sem(x, axis=axis) * (UB - LB)\n    return EB\n"
  },
  {
    "path": "bayesmark/util.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"General utilities that should arguably be included in Python.\n\"\"\"\nimport shlex\n\n\ndef in_or_none(x, L):\n    \"\"\"Check if item is in list of list is None.\"\"\"\n    return (L is None) or (x in L)\n\n\ndef all_unique(L):\n    \"\"\"Check if all elements in a list are unique.\n\n    Parameters\n    ----------\n    L : list\n        List we would like to check for uniqueness.\n\n    Returns\n    -------\n    uniq : bool\n        True if all elements in `L` are unique.\n    \"\"\"\n    uniq = len(L) == len(set(L))\n    return uniq\n\n\ndef strict_sorted(L):\n    \"\"\"Return a strictly sorted version of `L`. Therefore, this raises an error if `L` contains duplicates.\n\n    Parameters\n    ----------\n    L : list\n        List we would like to sort.\n\n    Returns\n    -------\n    S : list\n        Strictly sorted version of `L`.\n    \"\"\"\n    assert all_unique(L), \"Cannot strict sort because list contains duplicates.\"\n    S = sorted(L)\n    return S\n\n\ndef range_str(stop):\n    \"\"\"Version of ``range(stop)`` that instead returns strings that are zero padded so the entire iteration is of the\n    same length.\n\n    Parameters\n    ----------\n    stop : int\n        Stop value equivalent to ``range(stop)``.\n\n    Yields\n    ------\n    x : str\n        String representation of integer zero padded so all items from this generator have the same ``len(x)``.\n    \"\"\"\n    str_len = len(str(stop - 1))  # moot if stop=0\n\n    def map_(x):\n        ss = str(x).zfill(str_len)\n        return x, ss\n\n    G = map(map_, range(stop))\n    return G\n\n\ndef str_join_safe(delim, str_vec, append=False):\n    \"\"\"Version of `str.join` that is guaranteed to be invertible.\n\n    Parameters\n    ----------\n    delim : str\n        Delimiter to join the strings.\n    str_vec : list(str)\n        List of strings to join. A `ValueError` is raised if `delim` is present in any of these strings.\n    append : bool\n        If true, assume the first element is already joined and we are appending to it. So, `str_vec[0]` can contain\n        `delim`.\n\n    Returns\n    -------\n    joined_str : str\n        Joined version of `str_vec`, which is always recoverable with ``joined_str.split(delim)``.\n\n    Examples\n    --------\n    Append is required because,\n\n    .. code-block:: pycon\n\n        ss = str_join_safe('_', ('foo', 'bar'))\n        str_join_safe('_', (ss, 'baz', 'qux'))\n\n    would fail because we are appending ``'baz'`` and ``'qux'`` to the already joined string ``ss = 'foo_bar'``.\n\n    In this case, we use\n\n    .. code-block:: pycon\n\n        ss = str_join_safe('_', ('foo', 'bar'))\n        str_join_safe('_', (ss, 'baz', 'qux'), append=True)\n    \"\"\"\n    chk_vec = str_vec[1:] if append else str_vec\n\n    for ss in chk_vec:\n        if delim in ss:\n            raise ValueError(\"%s cannot contain delimeter %s\" % (ss, delim))\n\n    joined_str = delim.join(str_vec)\n    return joined_str\n\n\ndef shell_join(argv, delim=\" \"):\n    \"\"\"Join strings together in a way that is an inverse of `shlex` shell parsing into `argv`.\n\n    Basically, if the resulting string is passed as a command line argument then `sys.argv` will equal `argv`.\n\n    Parameters\n    ----------\n    argv : list(str)\n        List of arguments to collect into command line string. It will be escaped accordingly.\n    delim : str\n        Whitespace delimiter to join the strings.\n\n    Returns\n    -------\n    cmd : str\n        Properly escaped and joined command line string.\n    \"\"\"\n    vv = [shlex.quote(vv) for vv in argv]\n    cmd = delim.join(vv)\n    assert shlex.split(cmd) == list(argv)\n    return cmd\n\n\ndef chomp(str_val, ext=\"\\n\"):\n    \"\"\"Chomp a suffix off a string.\n\n    Parameters\n    ----------\n    str_val : str\n        String we want to chomp off a suffix, e.g., ``\"foo.log\"``, and we want to chomp the file extension.\n    ext : str\n        The suffix we want to chomp. An error is raised if `str_val` doesn't end in `ext`.\n\n    Returns\n    -------\n    chomped : str\n        Version of `str_val` with `ext` removed from the end.\n    \"\"\"\n    n = len(ext)\n    assert n > 0\n\n    chomped, ext_ = str_val[:-n], str_val[-n:]\n    assert ext == ext_, \"%s must end with %s\" % (repr(str_val), repr(ext))\n    return chomped\n\n\ndef preimage_func(f, x):\n    \"\"\"Pre-image a funcation at a set of input points.\n\n    Parameters\n    ----------\n    f : typing.Callable\n        The function we would like to pre-image. The output type must be hashable.\n    x : typing.Iterable\n        Input points we would like to evaluate `f`. `x` must be of a type acceptable by `f`.\n\n    Returns\n    -------\n    D : dict(object, list(object))\n        This dictionary maps the output of `f` to the list of `x` values that produce it.\n    \"\"\"\n    D = {}\n    for xx in x:\n        D.setdefault(f(xx), []).append(xx)\n    return D\n"
  },
  {
    "path": "bayesmark/xr_util.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"General utilities for `xarray` that should be included in `xarray`.\n\"\"\"\nfrom collections import OrderedDict\n\nimport numpy as np\nimport pandas as pd\nimport xarray as xr\n\nfrom bayesmark.util import all_unique\n\n\ndef is_simple_coords(coords, min_side=0, dims=None):\n    \"\"\"Check if all xr coordinates are \"simple\". That is, equals to ``np.arange(n)``.\n\n    Parameters\n    ----------\n    coords : dict-like of coordinates\n        The coordinates we would like to check, e.g. from ``DataArray.coords``.\n    min_side : int\n        The minimum side requirement. We can set this ``min_side=1`` and have empty coordinates result in a return\n        value of ``False``.\n    dims : None or list of dimension names\n        Dimensions we want to check for simplicity. If ``None``, check all dimensions.\n\n    Returns\n    -------\n    simple : bool\n        True when all coordinates are simple.\n    \"\"\"\n    for kk in coords:\n        if (dims is None) or (kk in dims):\n            C = coords[kk].values\n            # Not checking dtype on empty coords, could check that too if we want to be strict\n            if len(C) > 0 and C.dtype != np.int_:\n                return False\n\n            C = C.tolist()\n            if len(C) < min_side:\n                return False\n            if C != list(range(len(C))):\n                return False\n    return True\n\n\ndef ds_like(ref, vars_, dims, fill=np.nan):\n    \"\"\"Produce a blank :class:`xarray:xarray.Dataset` copying some coordinates from another\n    :class:`xarray:xarray.Dataset`.\n\n    Parameters\n    ----------\n    ref : :class:`xarray:xarray.Dataset`\n        The reference dataset we want to copy coordinates from.\n    vars_ : typing.Iterable\n        List of variable names we want in the new dataset.\n    dims : list\n        List of dimensions we want to copy over from `ref`. These are the dimensions of the output.\n    fill : scalar\n        Scalar value to fill the blank dataset. The `dtype` will be determined from the `fill` value.\n\n    Returns\n    -------\n    ds : :class:`xarray:xarray.Dataset`\n        A new dataset with variables `vars_` and dimensions `dims` where the coordinates have been copied from `ref`.\n        All values are filled with `fill`.\n    \"\"\"\n    size = [ref.sizes[dd] for dd in dims]\n\n    # Use OrderedDict for good measure, probably not needed\n    data = OrderedDict([(vv, (dims, np.full(size, fill))) for vv in vars_])\n    coords = OrderedDict([(dd, ref.coords[dd].values) for dd in dims])\n    ds = xr.Dataset(data, coords=coords)\n    return ds\n\n\ndef ds_like_mixed(ref, vars_, dims, fill=np.nan):\n    \"\"\"The same as `ds_like` but allow different dimensions for each variable.\n\n    Parameters\n    ----------\n    ref : :class:`xarray:xarray.Dataset`\n        The reference dataset we want to copy coordinates from.\n    vars_ : typing.Iterable\n        List of (variable names, dimension) pairs we want in the new dataset. The dimensions for each variable must be\n        a subset of `dims`.\n    dims : list\n        List of all dimensions we want to copy over from `ref`.\n    fill : scalar\n        Scalar value to fill the blank dataset. The `dtype` will be determined from the `fill` value.\n\n    Returns\n    -------\n    ds : :class:`xarray:xarray.Dataset`\n        A new dataset with variables `vars_` and dimensions `dims` where the coordinates have been copied from `ref`.\n        All values are filled with `fill`.\n    \"\"\"\n    coords = OrderedDict([(dd, ref.coords[dd].values) for dd in dims])\n\n    data = OrderedDict()\n    for var_name, var_dims in vars_:\n        assert set(var_dims).issubset(dims)\n        size = [ref.sizes[dd] for dd in var_dims]\n        data[var_name] = (var_dims, np.full(size, np.nan))\n    ds = xr.Dataset(data, coords=coords)\n    return ds\n\n\ndef only_dataarray(ds):\n    \"\"\"Convert a :class:`xarray:xarray.Dataset` to a :class:`xarray:xarray.DataArray`. If the\n    :class:`xarray:xarray.Dataset` has more than one variable, an error is raised.\n\n    Parameters\n    ----------\n    ds : :class:`xarray:xarray.Dataset`\n        :class:`xarray:xarray.Dataset` we would like to convert to a :class:`xarray:xarray.DataArray`. This must\n        contain only one variable.\n\n    Returns\n    -------\n    da : :class:`xarray:xarray.DataArray`\n        The :class:`xarray:xarray.DataArray` extracted from `ds`.\n    \"\"\"\n    name, = ds\n    da = ds[name]\n    return da\n\n\ndef coord_compat(da_seq, dims):\n    \"\"\"Check if a sequence of :class:`xarray:xarray.DataArray` have compatible coordinates.\n\n    Parameters\n    ----------\n    da_seq : list(:class:`xarray:xarray.DataArray`)\n        Sequence of :class:`xarray:xarray.DataArray` we would like to check for compatibility.\n        :class:`xarray:xarray.Dataset` work too.\n    dims : list\n        Subset of all dimensions in the :class:`xarray:xarray.DataArray` we are concerned with for compatibility.\n\n    Returns\n    -------\n    compat : bool\n        True if all the :class:`xarray:xarray.DataArray` have compatible coordinates.\n    \"\"\"\n    if len(da_seq) <= 1:\n        return True\n\n    ref = da_seq[0]\n    for da in da_seq:\n        # There is probably a better way to do this by attempting concat in try-except, but good enough for now:\n        for dd in dims:\n            assert dd in da.coords, \"dim %s missing in dataarray\" % dd\n            if not np.all(ref.coords[dd].values == da.coords[dd].values):\n                return False\n    return True\n\n\ndef da_to_string(da):\n    \"\"\"Generate a human readable version of a 1D :class:`xarray:xarray.DataArray`.\n\n    Parameters\n    ----------\n    da : :class:`xarray:xarray.DataArray`\n        The :class:`xarray:xarray.DataArray` to display. Must only have one dimension.\n\n    Returns\n    -------\n    str_val : str\n        String with human readable version of `da`.\n    \"\"\"\n    assert len(da.dims) == 1\n    str_val = da.to_series().to_string()\n    return str_val\n\n\ndef da_concat(da_dict, dims):\n    \"\"\"Concatenate a dictionary of :class:`xarray:xarray.DataArray` similar to :func:`pandas:pandas.concat`.\n\n    Parameters\n    ----------\n    da_dict : dict(tuple(str), :class:`xarray:xarray.DataArray`)\n        Dictionary of :class:`xarray:xarray.DataArray` to combine. The keys are tuples of index values. The\n        :class:`xarray:xarray.DataArray` must have compatible coordinates.\n    dims : list(str)\n        The names of the new dimensions we create for the dictionary keys. This must be of the same length as the\n        key tuples in `da_dict`.\n\n    Returns\n    -------\n    da : :class:`xarray:xarray.DataArray`\n        Combined data array. The new dimensions will be ``input_da.dims + dims``.\n    \"\"\"\n    assert len(da_dict) > 0\n    assert all(len(da.dims) > 0 for da in da_dict.values()), \"0-dimensional DataArray not supported\"\n    assert all_unique(dims)\n\n    cur_dims = list(da_dict.values())[0].dims\n    assert all(da.dims == cur_dims for da in da_dict.values())\n    assert len(set(cur_dims) & set(dims)) == 0\n\n    def squeeze(tt):\n        if len(tt) == 1:\n            return tt[0]\n        return tt\n\n    D = OrderedDict([(squeeze(kk), da.to_series()) for kk, da in da_dict.items()])\n    df = pd.concat(D, axis=1)\n\n    assert df.columns.nlevels == len(dims)\n    df.columns.names = dims\n\n    df = df.stack(level=list(range(df.columns.nlevels)))\n    assert isinstance(df, pd.Series)\n    da = df.to_xarray()\n    assert isinstance(da, xr.DataArray)\n    return da\n\n\ndef ds_concat(ds_dict, dims):\n    \"\"\"Concatenate a dictionary of :class:`xarray:xarray.Dataset` similar to :func:`pandas:pandas.concat`, and a\n    generalization of :func:`.da_concat`.\n\n    Parameters\n    ----------\n    ds_dict : dict(tuple(str), :class:`xarray:xarray.DataArray`)\n        Dictionary of :class:`xarray:xarray.Dataset` to combine. The keys are tuples of index values. The\n        :class:`xarray:xarray.Dataset` must have compatible coordinates, and all have the same variables.\n    dims : list(str)\n        The names of the new dimensions we create for the dictionary keys. This must be of the same length as the\n        key tuples in `ds_dict`.\n\n    Returns\n    -------\n    ds : :class:`xarray:xarray.Dataset`\n        Combined dataset. For each variable `var`, the new dimensions will be ``input_ds[var].dims + dims``.\n    \"\"\"\n    assert len(ds_dict) > 0\n    assert len(dims) > 0\n    assert all(len(kk) == len(dims) for kk in ds_dict)\n\n    # Get an arbitrary element as the reference\n    k0 = list(ds_dict.keys())[0]\n\n    # Check all vars the same\n    vars_, = set([tuple(ds) for ds in ds_dict.values()])\n\n    # Now combine da for each variable, one at a time\n    ds = xr.Dataset(coords=ds_dict[k0].coords)\n    for vv in vars_:\n        da_dict = OrderedDict([(kk, da[vv]) for kk, da in ds_dict.items()])\n        ds[vv] = da_concat(da_dict, dims)\n\n    return ds\n"
  },
  {
    "path": "build_wheel.sh",
    "content": "#!/bin/bash\n\nset -ex\nset -o pipefail\n\n# Display what version is being used for logging\npython --version\n\n# Fail if untracked files so we don't delete them in next step\ntest -z \"$(git status --porcelain)\"\n\n# Build from clean repo, delete all ignored files\ngit clean -x -f -d\n\n# Get everything in place to put inside the wheel\nSHA_LONG=$(git rev-parse HEAD)\necho VERSION=\\\"$SHA_LONG\\\" >bayesmark/version.py\n\n# Now the actual build\npython3 setup.py sdist\n"
  },
  {
    "path": "docs/.gitignore",
    "content": "_build\n"
  },
  {
    "path": "docs/Makefile",
    "content": "# Makefile for Sphinx documentation\n#\n\n# You can set these variables from the command line.\nSPHINXOPTS    =\nSPHINXBUILD   = sphinx-build\nPAPER         =\nBUILDDIR      = _build\n\n# User-friendly check for sphinx-build\nifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)\n$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)\nendif\n\n# Internal variables.\nPAPEROPT_a4     = -D latex_paper_size=a4\nPAPEROPT_letter = -D latex_paper_size=letter\nALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .\n# the i18n builder cannot share the environment and doctrees with the others\nI18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .\n\n.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp latex latexpdf text man texinfo info gettext changes xml pseudoxml linkcheck all\n\nhelp:\n\t@echo \"Please use \\`make <target>' where <target> is one of\"\n\t@echo \"  html       to make standalone HTML files\"\n\t@echo \"  dirhtml    to make HTML files named index.html in directories\"\n\t@echo \"  singlehtml to make a single large HTML file\"\n\t@echo \"  pickle     to make pickle files\"\n\t@echo \"  json       to make JSON files\"\n\t@echo \"  htmlhelp   to make HTML files and a HTML help project\"\n\t@echo \"  qthelp     to make HTML files and a qthelp project\"\n\t@echo \"  devhelp    to make HTML files and a Devhelp project\"\n\t@echo \"  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter\"\n\t@echo \"  latexpdf   to make LaTeX files and run them through pdflatex\"\n\t@echo \"  text       to make text files\"\n\t@echo \"  man        to make manual pages\"\n\t@echo \"  texinfo    to make Texinfo files\"\n\t@echo \"  info       to make Texinfo files and run them through makeinfo\"\n\t@echo \"  gettext    to make PO message catalogs\"\n\t@echo \"  changes    to make an overview of all changed/added/deprecated items\"\n\t@echo \"  xml        to make Docutils-native XML files\"\n\t@echo \"  pseudoxml  to make pseudoxml-XML files for display purposes\"\n\t@echo \"  linkcheck  to check all external links for integrity\"\n\nclean:\n\trm -rf $(BUILDDIR)/*\n\nhtml:\n\t$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html\n\t@echo\n\t@echo \"Build finished. The HTML pages are in $(BUILDDIR)/html.\"\n\ndirhtml:\n\t$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml\n\t@echo\n\t@echo \"Build finished. The HTML pages are in $(BUILDDIR)/dirhtml.\"\n\nsinglehtml:\n\t$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml\n\t@echo\n\t@echo \"Build finished. The HTML page is in $(BUILDDIR)/singlehtml.\"\n\npickle:\n\t$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle\n\t@echo\n\t@echo \"Build finished; now you can process the pickle files.\"\n\njson:\n\t$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json\n\t@echo\n\t@echo \"Build finished; now you can process the JSON files.\"\n\nhtmlhelp:\n\t$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp\n\t@echo\n\t@echo \"Build finished; now you can run HTML Help Workshop with the\" \\\n\t      \".hhp project file in $(BUILDDIR)/htmlhelp.\"\n\nqthelp:\n\t$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp\n\t@echo\n\t@echo \"Build finished; now you can run \"qcollectiongenerator\" with the\" \\\n\t      \".qhcp project file in $(BUILDDIR)/qthelp, like this:\"\n\t@echo \"# qcollectiongenerator $(BUILDDIR)/qthelp/bayesmark.qhcp\"\n\t@echo \"To view the help file:\"\n\t@echo \"# assistant -collectionFile $(BUILDDIR)/qthelp/bayesmark.qhc\"\n\ndevhelp:\n\t$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp\n\t@echo\n\t@echo \"Build finished.\"\n\t@echo \"To view the help file:\"\n\t@echo \"# mkdir -p $$HOME/.local/share/devhelp/bayesmark\"\n\t@echo \"# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/bayesmark\"\n\t@echo \"# devhelp\"\n\nlatex:\n\t$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex\n\t@echo\n\t@echo \"Build finished; the LaTeX files are in $(BUILDDIR)/latex.\"\n\t@echo \"Run \\`make' in that directory to run these through (pdf)latex\" \\\n\t      \"(use \\`make latexpdf' here to do that automatically).\"\n\nlatexpdf:\n\t$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex\n\t@echo \"Running LaTeX files through pdflatex...\"\n\t$(MAKE) -C $(BUILDDIR)/latex all-pdf\n\t@echo \"pdflatex finished; the PDF files are in $(BUILDDIR)/latex.\"\n\ntext:\n\t$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text\n\t@echo\n\t@echo \"Build finished. The text files are in $(BUILDDIR)/text.\"\n\nman:\n\t$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man\n\t@echo\n\t@echo \"Build finished. The manual pages are in $(BUILDDIR)/man.\"\n\ntexinfo:\n\t$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo\n\t@echo\n\t@echo \"Build finished. The Texinfo files are in $(BUILDDIR)/texinfo.\"\n\t@echo \"Run \\`make' in that directory to run these through makeinfo\" \\\n\t      \"(use \\`make info' here to do that automatically).\"\n\ninfo:\n\t$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo\n\t@echo \"Running Texinfo files through makeinfo...\"\n\tmake -C $(BUILDDIR)/texinfo info\n\t@echo \"makeinfo finished; the Info files are in $(BUILDDIR)/texinfo.\"\n\ngettext:\n\t$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale\n\t@echo\n\t@echo \"Build finished. The message catalogs are in $(BUILDDIR)/locale.\"\n\nchanges:\n\t$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes\n\t@echo\n\t@echo \"The overview file is in $(BUILDDIR)/changes.\"\n\nlinkcheck:\n\t$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck\n\t@echo\n\t@echo \"Link check complete; look for any errors in the above output \" \\\n\t      \"or in $(BUILDDIR)/linkcheck/output.txt.\"\n\nxml:\n\t$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml\n\t@echo\n\t@echo \"Build finished. The XML files are in $(BUILDDIR)/xml.\"\n\npseudoxml:\n\t$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml\n\t@echo\n\t@echo \"Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml.\"\n\nall: html dirhtml singlehtml pickle json htmlhelp qthelp devhelp latex latexpdf text man texinfo info gettext changes xml pseudoxml linkcheck\n"
  },
  {
    "path": "docs/authors.rst",
    "content": "-------\nCredits\n-------\n\n~~~~~~~~~~~~~~~~\nDevelopment lead\n~~~~~~~~~~~~~~~~\n\nRyan Turner (rdturnermtl)\n\n~~~~~~~~~~~~\nContributors\n~~~~~~~~~~~~\n\n* David Eriksson (dme65)\n"
  },
  {
    "path": "docs/code.rst",
    "content": "-------------\nCode Overview\n-------------\n\n.. _bayesmark:\n\n~~~~\nData\n~~~~\n\n.. automodule:: bayesmark.data\n   :members:\n   :exclude-members:\n\n~~~~~~~~~~~~~~~~~~~~~~~\nExpected Max Estimation\n~~~~~~~~~~~~~~~~~~~~~~~\n\n.. automodule:: bayesmark.expected_max\n   :members:\n   :exclude-members:\n\n~~~~~~~~~~~~~~~~~~~~~~\nExperiment Aggregation\n~~~~~~~~~~~~~~~~~~~~~~\n\n.. automodule:: bayesmark.experiment_aggregate\n   :members:\n   :exclude-members: main\n\n~~~~~~~~~~~~~~~~~~~\nExperiment Analysis\n~~~~~~~~~~~~~~~~~~~\n\n.. automodule:: bayesmark.experiment_analysis\n   :members:\n   :exclude-members: main\n\n~~~~~~~~~~~~~~~~~~~\nExperiment Baseline\n~~~~~~~~~~~~~~~~~~~\n\n.. automodule:: bayesmark.experiment_baseline\n   :members:\n   :exclude-members: main, do_baseline\n\n~~~~~~~~~~~~~~~~~~~\nExperiment Launcher\n~~~~~~~~~~~~~~~~~~~\n\n.. automodule:: bayesmark.experiment_launcher\n   :members:\n   :exclude-members: main\n\n~~~~~~~~~~\nExperiment\n~~~~~~~~~~\n\n.. automodule:: bayesmark.experiment\n   :members:\n   :exclude-members: experiment_main\n\n~~~~~~~~~~~~~~~~~~~\nFunction Signatures\n~~~~~~~~~~~~~~~~~~~\n\n.. automodule:: bayesmark.signatures\n   :members:\n   :exclude-members:\n\n~~~~~~~~~~\nNumpy Util\n~~~~~~~~~~\n\n.. automodule:: bayesmark.np_util\n   :members:\n   :exclude-members:\n\n~~~~~~~~~\nPath Util\n~~~~~~~~~\n\n.. automodule:: bayesmark.path_util\n   :members:\n   :exclude-members:\n\n~~~~~~~~~~~~~~~~~~~\nQuantile Estimation\n~~~~~~~~~~~~~~~~~~~\n\n.. automodule:: bayesmark.quantiles\n   :members:\n   :exclude-members: ensure_shape\n\n~~~~~~~~~~~~~\nRandom Search\n~~~~~~~~~~~~~\n\n.. automodule:: bayesmark.random_search\n   :members:\n   :exclude-members:\n\n~~~~~~~~~~~~~\nSerialization\n~~~~~~~~~~~~~\n\n.. automodule:: bayesmark.serialize\n   :members:\n   :exclude-members: Serializer\n\n~~~~~~~~~~~~~~\nSklearn Tuning\n~~~~~~~~~~~~~~\n\n.. automodule:: bayesmark.sklearn_funcs\n   :members:\n   :exclude-members:\n\n~~~~~\nSpace\n~~~~~\n\n.. automodule:: bayesmark.space\n   :members:\n   :exclude-members: check_array, unravel_index\n\n~~~~~\nStats\n~~~~~\n\n.. automodule:: bayesmark.stats\n   :members:\n   :exclude-members:\n\n~~~~~~~~~~~~~~\nUtil (General)\n~~~~~~~~~~~~~~\n\n.. automodule:: bayesmark.util\n   :members:\n   :exclude-members:\n\n~~~~~~~~~~~\nXarray Util\n~~~~~~~~~~~\n\n.. automodule:: bayesmark.xr_util\n   :members:\n   :exclude-members:\n"
  },
  {
    "path": "docs/conf.py",
    "content": "#!/usr/bin/env python\n# -*- coding: utf-8 -*-\n#\n# bayesmark documentation build configuration file.\n#\n# This file is execfile()d with the current directory set to its\n# containing dir.\n#\n# Note that not all possible configuration values are present in this\n# autogenerated file.\n#\n# All configuration values have a default; values that are commented out\n# serve to show the default.\n\nimport os\nimport sys\n\n# If extensions (or modules to document with autodoc) are in another\n# directory, add these directories to sys.path here. If the directory is\n# relative to the documentation root, use os.path.abspath to make it\n# absolute, like shown here.\nsys.path.insert(0, os.path.abspath(\"..\"))\nsys.path.append(os.path.join(os.path.dirname(__file__), \"..\"))\n\n# Get the project root dir, which is the parent dir of this\ncwd = os.getcwd()\nproject_root = os.path.dirname(cwd)\n\n# Insert the project root dir as the first element in the PYTHONPATH.\n# This lets us ensure that the source package is imported, and that its\n# version is used.\nsys.path.insert(0, project_root)\n\n# -- General configuration ---------------------------------------------\n\n# If your documentation needs a minimal Sphinx version, state it here.\n# needs_sphinx = '1.0'\n\n# Add any Sphinx extension module names here, as strings. They can be\n# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.\nextensions = [\"sphinx.ext.autodoc\", \"sphinx.ext.viewcode\", \"sphinx.ext.intersphinx\", \"sphinx.ext.napoleon\"]\n\n# Add any paths that contain templates here, relative to this directory.\ntemplates_path = [\"_templates\"]\n\n# The suffix of source filenames.\nsource_suffix = {\".rst\": \"restructuredtext\", \".txt\": \"markdown\", \".md\": \"markdown\"}\n\n# The encoding of source files.\n# source_encoding = 'utf-8-sig'\n\n# The master toctree document.\nmaster_doc = \"index\"\n\n# General information about the project.\nproject = \"bayesmark\"\ncopyright = \"2018-2019\"\n\n# The version info for the project you're documenting, acts as replacement\n# for |version| and |release|, also used in various other places throughout\n# the built documents.\n#\n# The short X.Y version.\n# version = bayesmark.__version__\n# The full version, including alpha/beta/rc tags.\n# release = bayesmark.__version__\n\n# The language for content autogenerated by Sphinx. Refer to documentation\n# for a list of supported languages.\n# language = None\n\n# There are two options for replacing |today|: either, you set today to\n# some non-false value, then it is used:\n# today = ''\n# Else, today_fmt is used as the format for a strftime call.\n# today_fmt = '%B %d, %Y'\n\n# List of patterns, relative to source directory, that match files and\n# directories to ignore when looking for source files.\nexclude_patterns = [\"_build\"]\n\n# The reST default role (used for this markup: `text`) to use for all\n# documents.\n# default_role = None\n\n# If true, '()' will be appended to :func: etc. cross-reference text.\n# add_function_parentheses = True\n\n# If true, the current module name will be prepended to all description\n# unit titles (such as .. function::).\n# add_module_names = True\n\n# If true, sectionauthor and moduleauthor directives will be shown in the\n# output. They are ignored by default.\n# show_authors = False\n\n# The name of the Pygments (syntax highlighting) style to use.\npygments_style = \"sphinx\"\n\n# A list of ignored prefixes for module index sorting.\n# modindex_common_prefix = []\n\n# If true, keep warnings as \"system message\" paragraphs in the built\n# documents.\n# keep_warnings = False\n\nintersphinx_mapping = {\n    \"python\": (\"https://docs.python.org/3/\", None),\n    \"numpy\": (\"https://docs.scipy.org/doc/numpy-1.16.1/\", None),\n    \"pandas\": (\"https://pandas.pydata.org/pandas-docs/stable/\", None),\n    \"xarray\": (\"http://xarray.pydata.org/en/stable/\", None),\n    \"sklearn\": (\"https://scikit-learn.org/stable/\", None),\n}\n\n# -- Options for HTML output -------------------------------------------\n\n# The theme to use for HTML and HTML Help pages.  See the documentation for\n# a list of builtin themes.\nhtml_theme = \"default\"\n\n# Theme options are theme-specific and customize the look and feel of a\n# theme further.  For a list of options available for each theme, see the\n# documentation.\n# html_theme_options = {}\n\n# Add any paths that contain custom themes here, relative to this directory.\n# html_theme_path = []\n\n# The name for this set of Sphinx documents.  If None, it defaults to\n# \"<project> v<release> documentation\".\n# html_title = None\n\n# A shorter title for the navigation bar.  Default is the same as\n# html_title.\n# html_short_title = None\n\n# The name of an image file (relative to this directory) to place at the\n# top of the sidebar.\n# html_logo = None\n\n# The name of an image file (within the static path) to use as favicon\n# of the docs.  This file should be a Windows icon file (.ico) being\n# 16x16 or 32x32 pixels large.\n# html_favicon = None\n\n# Add any paths that contain custom static files (such as style sheets)\n# here, relative to this directory. They are copied after the builtin\n# static files, so a file named \"default.css\" will overwrite the builtin\n# \"default.css\".\nhtml_static_path = []\n\n# If not '', a 'Last updated on:' timestamp is inserted at every page\n# bottom, using the given strftime format.\n# html_last_updated_fmt = '%b %d, %Y'\n\n# If true, SmartyPants will be used to convert quotes and dashes to\n# typographically correct entities.\n# html_use_smartypants = True\n\n# Custom sidebar templates, maps document names to template names.\n# html_sidebars = {}\n\n# Additional templates that should be rendered to pages, maps page names\n# to template names.\n# html_additional_pages = {}\n\n# If false, no module index is generated.\n# html_domain_indices = True\n\n# If false, no index is generated.\n# html_use_index = True\n\n# If true, the index is split into individual pages for each letter.\n# html_split_index = False\n\n# If true, links to the reST sources are added to the pages.\n# html_show_sourcelink = True\n\n# If true, \"Created using Sphinx\" is shown in the HTML footer.\n# Default is True.\n# html_show_sphinx = True\n\n# If true, \"(C) Copyright ...\" is shown in the HTML footer.\n# Default is True.\n# html_show_copyright = True\n\n# If true, an OpenSearch description file will be output, and all pages\n# will contain a <link> tag referring to it.  The value of this option\n# must be the base URL from which the finished HTML is served.\n# html_use_opensearch = ''\n\n# This is the file name suffix for HTML files (e.g. \".xhtml\").\n# html_file_suffix = None\n\n# Output file base name for HTML help builder.\nhtmlhelp_basename = \"bayesmark_doc\"\n\n\n# -- Options for LaTeX output ------------------------------------------\n\nlatex_elements = {\n    # The paper size ('letterpaper' or 'a4paper').\n    #'papersize': 'letterpaper',\n    # The font size ('10pt', '11pt' or '12pt').\n    #'pointsize': '10pt',\n    # Additional stuff for the LaTeX preamble.\n    #'preamble': '',\n}\n\n# Grouping the document tree into LaTeX files. List of tuples\n# (source start file, target name, title, author, documentclass\n# [howto/manual]).\nlatex_documents = [(\"index\", \"bayesmark.tex\", \"BO Benchmark Documentation\", \"Uber AI Labs\", \"manual\")]\n\n# The name of an image file (relative to this directory) to place at\n# the top of the title page.\n# latex_logo = None\n\n# For \"manual\" documents, if this is true, then toplevel headings\n# are parts, not chapters.\n# latex_use_parts = False\n\n# If true, show page references after internal links.\n# latex_show_pagerefs = False\n\n# If true, show URL addresses after external links.\n# latex_show_urls = False\n\n# Documents to append as an appendix to all manuals.\n# latex_appendices = []\n\n# If false, no module index is generated.\n# latex_domain_indices = True\n\n\n# -- Options for manual page output ------------------------------------\n\n# One entry per manual page. List of tuples\n# (source start file, name, description, authors, manual section).\nman_pages = [(\"index\", \"bayesmark\", \"bayesmark Documentation\", [\"Uber AI Labs\"], 1)]\n\n# If true, show URL addresses after external links.\n# man_show_urls = False\n\n\n# -- Options for Texinfo output ----------------------------------------\n\n# Grouping the document tree into Texinfo files. List of tuples\n# (source start file, target name, title, author,\n#  dir menu entry, description, category)\ntexinfo_documents = [\n    (\n        \"index\",\n        \"bayesmark\",\n        \"bayesmark Documentation\",\n        \"Uber AI Labs\",\n        \"bayesmark\",\n        \"Benchmark of Bayesian optimization packages on real problems.\",\n        \"Miscellaneous\",\n    )\n]\n\n# Documents to append as an appendix to all manuals.\n# texinfo_appendices = []\n\n# If false, no module index is generated.\n# texinfo_domain_indices = True\n\n# How to display URL addresses: 'footnote', 'no', or 'inline'.\n# texinfo_show_urls = 'footnote'\n\n# If true, do not generate a @detailmenu in the \"Top\" node's menu.\n# texinfo_no_detailmenu = False\n"
  },
  {
    "path": "docs/dummy.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nimport sphinx\n\n# import extra deps and use it to keep pipreqs and flake8 happy\nfor pkg in (sphinx,):\n    print(\"%s %s\" % (pkg.__name__, pkg.__version__))\n"
  },
  {
    "path": "docs/index.rst",
    "content": ".. bayesmark documentation master file, created by\n   sphinx-quickstart on Tue Jul  9 22:26:36 2013.\n   You can adapt this file completely to your liking, but it should at least\n   contain the root `toctree` directive.\n\nWelcome to the Bayes Opt Benchmark Documentation\n================================================\n\nContents:\n\n.. toctree::\n   :maxdepth: 2\n\n   readme\n   scoring\n   code\n   authors\n"
  },
  {
    "path": "docs/readme.rst",
    "content": ".. include:: ../README.rst\n"
  },
  {
    "path": "docs/scoring.rst",
    "content": ".. _how-scoring-works:\n\nHow scoring works\n=================\n\nThe scoring system is about aggregating the function evaluations of the optimizers. We represent :math:`F_{pmtn}` as the function evaluation of objective function :math:`p` (``TEST_CASE``) from the suggestion of method :math:`m` (``METHOD``) at batch :math:`t` (``ITER``) under repeated trial :math:`n` (``TRIAL``). In the case of batch sizes greater than 1, :math:`F_{pmtn}` is the minimum function evaluation across the suggestions in batch :math:`t`. The first transformation is that we consider the *cumulative minimum* over batches :math:`t` as the performance of the optimizer on a particular trial:\n\n.. math::\n\n   S_{pmtn} = \\textrm{cumm-min}_t F_{pmtn}\\,.\n\nAll of the aggregate quantities described here are computed by :func:`.experiment_analysis.compute_aggregates` (which is called by `bayesmark-anal <#analyze-and-summarize-results>`_) in either the ``agg_result`` or ``summary`` xarray datasets. Additionally, the baseline performances are in the xarray dataset ``baseline_ds`` from :func:`.experiment_baseline.compute_baseline`. The baseline dataset can be generated via the ``bayesmark-baseline`` command, but it is called automatically by ``bayesmark-anal`` if needed.\n\nMedian scores\n-------------\n\nThe more robust, but less decision-theoretically appealing method for aggregation is to look at median scores. On a per problem basis we simply consider the median (``agg_result[PERF_MED]``):\n\n.. math::\n\n   \\textrm{med-perf}_{pmt} = \\textrm{median}_n \\, S_{pmtn} \\,.\n\nHowever, this score is not very comparable across different problems as the objectives are all on different scales with possible different units. Therefore, we decide the *normalized score* (``agg_result[NORMED_MED]``) in a way that is *invariant* to linear transformation of the objective function:\n\n.. math::\n\n   \\textrm{norm-med-perf}_{pmt} = \\frac{\\textrm{med-perf}_{pmt}  - \\textrm{opt}_p}{\\textrm{rand-med-perf}_{pt} - \\textrm{opt}_p} \\,,\n\nwhere :math:`\\textrm{opt}_p` (``baseline_ds[PERF_BEST]``) is an estimate of the global minimum of objective function :math:`p`; and :math:`\\textrm{rand-med-perf}_{pt}` is the median performance of random search at batch :math:`t` on objective function :math:`p`. This means that, on any objective, an optimizer has score 0 after converging to the global minimum; and random search performs as a straight line at 1 for all :math:`t`. Conceptually, the median random search performance (``baseline_ds[PERF_MED]``) is computed as:\n\n.. math::\n\n   \\textrm{rand-med-perf}_{pt} = \\textrm{median}_n \\, S_{pmtn} \\,,\n\nwith :math:`m=` random search. However, every observation of :math:`F_{pmtn}` is iid in the case of random search. There is no reason to break the samples apart into trials :math:`n`. Instead, we use the function :func:`.quantiles.min_quantile_CI` to compute a more statistically efficient pooled estimator using the pooled random search samples over :math:`t` and :math:`n`. This pooled method is a nonparametric estimator of the quantiles of the minimum over a batch of samples, which is distribution free.\n\nTo further aggregate the performance over all objectives for a single optimizer we can consider the median-of-medians (``summary[PERF_MED]``):\n\n.. math::\n\n   \\textrm{med-perf}_{mt} = \\textrm{median}_p \\, \\textrm{norm-med-perf}_{pmt} \\,.\n\nCombining scores across different problems is sensible here because we have transformed them all onto the same scale.\n\nMean scores\n-----------\n\nFrom a decision theoretical perspective it is more sensible to consider the mean (possible warped) score. The median score can hide a high percentage of runs that completely fail. However, when we look at the mean score we first take the clipped score with a baseline value:\n\n.. math::\n\n   S'_{pmtn} = \\min(S_{pmtn}, \\textrm{clip}_p) \\,.\n\nThis is largely because there may be a non-zero probably of :math:`F = \\infty` (as in when the objective function crashes), which means that mean random search performance is infinite loss. We set :math:`\\textrm{clip}_p` (``baseline_ds[PERF_CLIP]``) to the median score after a single function evaluation, which is :math:`\\textrm{rand-med-perf}_{p0}` for a batch size of 1. The mean performance on a single problem (``agg_result[PERF_MEAN]``) then becomes:\n\n.. math::\n\n   \\textrm{mean-perf}_{pmt} = \\textrm{mean}_n \\, S'_{pmtn} \\,.\n\nWhich then becomes a normalized performance (``agg_result[NORMED_MEAN]``) of:\n\n.. math::\n\n   \\textrm{norm-mean-perf}_{pmt} = \\frac{\\textrm{mean-perf}_{pmt}  - \\textrm{opt}_p}{\\textrm{clip}_p  - \\textrm{opt}_p} \\,.\n\nNote there that the random search performance is only 1 at the first batch unlike for :math:`\\textrm{norm-med-perf}_{pmt}`.\n\nAgain we can aggregate this into all objective function performance with (``summary[PERF_MEAN]``):\n\n.. math::\n\n   \\textrm{mean-perf}_{mt} = \\textrm{mean}_p \\, \\textrm{norm-mean-perf}_{pmt} \\,,\n\nwhich is a mean-of-means (or *grand mean*), which is much more sensible in general than a median-of-medians. We can again obtain the property of random search having a constant performance of 1 for all :math:`t` using (``summary[NORMED_MEAN]``):\n\n.. math::\n\n   \\textrm{norm-mean-perf}_{mt} = \\frac{\\textrm{mean-perf}_{mt}}{\\textrm{rand-mean-perf}_{t}} \\,,\n\nwhere the random search baseline has been determined with the same sequence of equations as the other methods. These all collapse down to:\n\n.. math::\n\n   \\textrm{rand-mean-perf}_{t} = \\textrm{mean}_p \\, \\frac{\\textrm{rand-mean-perf}_{pt} - \\textrm{opt}_p}{\\textrm{clip}_p  - \\textrm{opt}_p} \\,.\n\nConceptually, we compute this random search baseline (``baseline_ds[PERF_MEAN]``) as:\n\n.. math::\n\n   \\textrm{rand-mean-perf}_{pt} = \\textrm{mean}_n \\, S'_{pmtn} \\,,\n\nwith :math:`m=` random search. However, because all function evaluations for random search are iid across :math:`t`, we can use a more statistically efficient pooled estimator :func:`.expected_max.expected_min`, which is an unbiased distribution free estimator on the expected minimum of :math:`m` samples from a distribution.\n\nNote that :math:`\\textrm{norm-mean-perf}_{mt}` is, in aggregate, a linear transformation on the expected loss :math:`S'`. This makes it more justified in a decision theory framework than the median score. However, to view it as a linear transformation we are considering the values in ``baseline_ds`` to be fixed reference losses values and not the output from the experiment.\n\nError bars\n----------\n\nThe datasets ``agg_result`` and ``summary`` also compute error bars in the form of ``LB_`` and ``UB_`` variables. These error bars do not consider the random variation in the baseline quantities from ``baseline_ds`` like ``opt`` and ``clip``. They are instead treated as fixed constant reference points. Therefore, they are computed by a different command ``bayesmark-baseline``. The user can generate the baselines when they want, but since they are not considered a random quantity in the statistics they are not automatically generated from the experimental data (unless the baseline file ``derived/baseline.json`` is missing).\n\nAdditionally, the error bars on the grand mean (``summary[PERF_MEAN]``) are computed by simply using t-statistic based error bars on the individual means. Under a \"random effects\" model, this does not actually lose any statistical power. However, this is computing the mean on the loss over sampling from new problems under the \"same distribution\" of benchmark problems. These error bars will be wider than if we computed the error bars on the grand mean over this particular set of benchmark problems.\n"
  },
  {
    "path": "example_opt_root/config.json",
    "content": "{\n    \"Flaky\": [\n        \"flaky_optimizer.py\",\n        {}\n    ],\n    \"HyperOpt-New\": [\n        \"hyperopt_optimizer.py\",\n        {}\n    ],\n    \"Nevergrad-OnePlusOne-New\": [\n        \"nevergrad_optimizer.py\",\n        {\n            \"budget\": 300,\n            \"tool\": \"OnePlusOne\"\n        }\n    ],\n    \"OpenTuner-BanditA-New\": [\n        \"opentuner_optimizer.py\",\n        {\n            \"techniques\": [\n                \"AUCBanditMetaTechniqueA\"\n            ]\n        }\n    ],\n    \"OpenTuner-GA-DE-New\": [\n        \"opentuner_optimizer.py\",\n        {\n            \"techniques\": [\n                \"PSO_GA_DE\"\n            ]\n        }\n    ],\n    \"OpenTuner-GA-New\": [\n        \"opentuner_optimizer.py\",\n        {\n            \"techniques\": [\n                \"PSO_GA_Bandit\"\n            ]\n        }\n    ],\n    \"PySOT-New\": [\n        \"pysot_optimizer.py\",\n        {}\n    ],\n    \"RandomSearch-New\": [\n        \"random_optimizer.py\",\n        {}\n    ],\n    \"Scikit-GBRT-Hedge-New\": [\n        \"scikit_optimizer.py\",\n        {\n            \"acq_func\": \"gp_hedge\",\n            \"base_estimator\": \"GBRT\",\n            \"n_initial_points\": 5\n        }\n    ],\n    \"Scikit-GP-Hedge-New\": [\n        \"scikit_optimizer.py\",\n        {\n            \"acq_func\": \"gp_hedge\",\n            \"base_estimator\": \"GP\",\n            \"n_initial_points\": 5\n        }\n    ],\n    \"Scikit-GP-LCB-New\": [\n        \"scikit_optimizer.py\",\n        {\n            \"acq_func\": \"LCB\",\n            \"base_estimator\": \"GP\",\n            \"n_initial_points\": 5\n        }\n    ]\n}\n"
  },
  {
    "path": "example_opt_root/flaky_optimizer.py",
    "content": "from time import sleep\n\nimport bayesmark.random_search as rs\nfrom bayesmark import np_util\nfrom bayesmark.abstract_optimizer import AbstractOptimizer\nfrom bayesmark.experiment import experiment_main\n\n\nclass FlakyOptimizer(AbstractOptimizer):\n    def __init__(self, api_config, random=np_util.random):\n        \"\"\"Build wrapper class to use random search function in benchmark.\n\n        Settings for `suggest_dict` can be passed using kwargs.\n\n        Parameters\n        ----------\n        api_config : dict-like of dict-like\n            Configuration of the optimization variables. See API description.\n        \"\"\"\n        AbstractOptimizer.__init__(self, api_config)\n        self.random = random\n        self.mode = self.random.choice([\"normal\", \"crash\", \"delay\"])\n\n    def suggest(self, n_suggestions=1):\n        \"\"\"Get suggestion.\n\n        Parameters\n        ----------\n        n_suggestions : int\n            Desired number of parallel suggestions in the output\n\n        Returns\n        -------\n        next_guess : list of dict\n            List of `n_suggestions` suggestions to evaluate the objective\n            function. Each suggestion is a dictionary where each key\n            corresponds to a parameter being optimized.\n        \"\"\"\n        if self.random.rand() <= 0.5 or self.mode == \"normal\":\n            x_guess = rs.suggest_dict([], [], self.api_config, n_suggestions=n_suggestions, random=self.random)\n        elif self.mode == \"delay\":\n            sleep(15 * 60)  # 15 minutes\n            x_guess = rs.suggest_dict([], [], self.api_config, n_suggestions=n_suggestions, random=self.random)\n        elif self.mode == \"crash\":\n            assert False, \"Crashing for testing purposes\"\n        else:\n            assert False, \"Crashing, not for testing purposes\"\n\n        return x_guess\n\n    def observe(self, X, y):\n        \"\"\"Feed an observation back.\n\n        Parameters\n        ----------\n        X : list of dict-like\n            Places where the objective function has already been evaluated.\n            Each suggestion is a dictionary where each key corresponds to a\n            parameter being optimized.\n        y : array-like, shape (n,)\n            Corresponding values where objective has been evaluated\n        \"\"\"\n        # Random search so don't do anything\n        pass\n\n\nif __name__ == \"__main__\":\n    experiment_main(FlakyOptimizer)\n"
  },
  {
    "path": "example_opt_root/hyperopt_optimizer.py",
    "content": "import numpy as np\nfrom hyperopt import hp, tpe\nfrom hyperopt.base import JOB_STATE_DONE, JOB_STATE_NEW, STATUS_OK, Domain, Trials\nfrom scipy.interpolate import interp1d\n\nfrom bayesmark.abstract_optimizer import AbstractOptimizer\nfrom bayesmark.experiment import experiment_main\nfrom bayesmark.np_util import random as np_random\nfrom bayesmark.np_util import random_seed\n\n# Sklearn prefers str to unicode:\nDTYPE_MAP = {\"real\": float, \"int\": int, \"bool\": bool, \"cat\": str, \"ordinal\": str}\n\n\ndef dummy_f(x):\n    assert False, \"This is a placeholder, it should never be called.\"\n\n\ndef only(x):\n    y, = x\n    return y\n\n\nclass HyperoptOptimizer(AbstractOptimizer):\n    primary_import = \"hyperopt\"\n\n    def __init__(self, api_config, random=np_random):\n        \"\"\"Build wrapper class to use hyperopt optimizer in benchmark.\n\n        Parameters\n        ----------\n        api_config : dict-like of dict-like\n            Configuration of the optimization variables. See API description.\n        \"\"\"\n        AbstractOptimizer.__init__(self, api_config)\n        self.random = random\n\n        space, self.round_to_values = HyperoptOptimizer.get_hyperopt_dimensions(api_config)\n        self.domain = Domain(dummy_f, space, pass_expr_memo_ctrl=None)\n        self.trials = Trials()\n\n        # Some book keeping like opentuner wrapper\n        self.trial_id_lookup = {}\n\n        # Store just for data validation\n        self.param_set_chk = frozenset(api_config.keys())\n\n    @staticmethod\n    def hashable_dict(d):\n        \"\"\"A custom function for hashing dictionaries.\n\n        Parameters\n        ----------\n        d : dict or dict-like\n            The dictionary to be converted to immutable/hashable type.\n\n        Returns\n        -------\n        hashable_object : frozenset of tuple pairs\n            Bijective equivalent to dict that can be hashed.\n        \"\"\"\n        hashable_object = frozenset(d.items())\n        return hashable_object\n\n    @staticmethod\n    def get_hyperopt_dimensions(api_config):\n        \"\"\"Help routine to setup hyperopt search space in constructor.\n\n        Take api_config as argument so this can be static.\n        \"\"\"\n        # The ordering of iteration prob makes no difference, but just to be\n        # safe and consistnent with space.py, I will make sorted.\n        param_list = sorted(api_config.keys())\n\n        space = {}\n        round_to_values = {}\n        for param_name in param_list:\n            param_config = api_config[param_name]\n\n            param_type = param_config[\"type\"]\n\n            param_space = param_config.get(\"space\", None)\n            param_range = param_config.get(\"range\", None)\n            param_values = param_config.get(\"values\", None)\n\n            # Some setup for case that whitelist of values is provided:\n            values_only_type = param_type in (\"cat\", \"ordinal\")\n            if (param_values is not None) and (not values_only_type):\n                assert param_range is None\n                param_values = np.unique(param_values)\n                param_range = (param_values[0], param_values[-1])\n                round_to_values[param_name] = interp1d(\n                    param_values, param_values, kind=\"nearest\", fill_value=\"extrapolate\"\n                )\n\n            if param_type == \"int\":\n                low, high = param_range\n                if param_space in (\"log\", \"logit\"):\n                    space[param_name] = hp.qloguniform(param_name, np.log(low), np.log(high), 1)\n                else:\n                    space[param_name] = hp.quniform(param_name, low, high, 1)\n            elif param_type == \"bool\":\n                assert param_range is None\n                assert param_values is None\n                space[param_name] = hp.choice(param_name, (False, True))\n            elif param_type in (\"cat\", \"ordinal\"):\n                assert param_range is None\n                space[param_name] = hp.choice(param_name, param_values)\n            elif param_type == \"real\":\n                low, high = param_range\n                if param_space in (\"log\", \"logit\"):\n                    space[param_name] = hp.loguniform(param_name, np.log(low), np.log(high))\n                else:\n                    space[param_name] = hp.uniform(param_name, low, high)\n            else:\n                assert False, \"type %s not handled in API\" % param_type\n\n        return space, round_to_values\n\n    def get_trial(self, trial_id):\n        for trial in self.trials._dynamic_trials:\n            if trial[\"tid\"] == trial_id:\n                assert isinstance(trial, dict)\n                # Make sure right kind of dict\n                assert \"state\" in trial and \"result\" in trial\n                assert trial[\"state\"] == JOB_STATE_NEW\n                return trial\n        assert False, \"No matching trial ID\"\n\n    def cleanup_guess(self, x_guess):\n        assert isinstance(x_guess, dict)\n        # Also, check the keys are only the vars we are searching over:\n        assert frozenset(x_guess.keys()) == self.param_set_chk\n\n        # Do the rounding\n        # Make a copy to be safe, and also unpack singletons\n        # We may also need to consider clip_chk at some point like opentuner\n        x_guess = {k: only(x_guess[k]) for k in x_guess}\n        for param_name, round_f in self.round_to_values.items():\n            x_guess[param_name] = round_f(x_guess[param_name])\n        # Also ensure this is correct dtype so sklearn is happy\n        x_guess = {k: DTYPE_MAP[self.api_config[k][\"type\"]](x_guess[k]) for k in x_guess}\n        return x_guess\n\n    def _suggest(self):\n        \"\"\"Helper function to `suggest` that does the work of calling\n        `hyperopt` via its dumb API.\n        \"\"\"\n        new_ids = self.trials.new_trial_ids(1)\n        assert len(new_ids) == 1\n        self.trials.refresh()\n\n        seed = random_seed(self.random)\n        new_trials = tpe.suggest(new_ids, self.domain, self.trials, seed)\n        assert len(new_trials) == 1\n\n        self.trials.insert_trial_docs(new_trials)\n        self.trials.refresh()\n\n        new_trial, = new_trials  # extract singleton\n        return new_trial\n\n    def suggest(self, n_suggestions=1):\n        \"\"\"Make `n_suggestions` suggestions for what to evaluate next.\n\n        This requires the user observe all previous suggestions before calling\n        again.\n\n        Parameters\n        ----------\n        n_suggestions : int\n            The number of suggestions to return.\n\n        Returns\n        -------\n        next_guess : list of dict\n            List of `n_suggestions` suggestions to evaluate the objective\n            function. Each suggestion is a dictionary where each key\n            corresponds to a parameter being optimized.\n        \"\"\"\n        assert n_suggestions >= 1, \"invalid value for n_suggestions\"\n\n        # Get the new trials, it seems hyperopt either uses random search or\n        # guesses one at a time anyway, so we might as welll call serially.\n        new_trials = [self._suggest() for _ in range(n_suggestions)]\n\n        X = []\n        for trial in new_trials:\n            x_guess = self.cleanup_guess(trial[\"misc\"][\"vals\"])\n            X.append(x_guess)\n\n            # Build lookup to get original trial object\n            x_guess_ = HyperoptOptimizer.hashable_dict(x_guess)\n            assert x_guess_ not in self.trial_id_lookup, \"the suggestions should not already be in the trial dict\"\n            self.trial_id_lookup[x_guess_] = trial[\"tid\"]\n\n        assert len(X) == n_suggestions\n        return X\n\n    def observe(self, X, y):\n        \"\"\"Feed the observations back to hyperopt.\n\n        Parameters\n        ----------\n        X : list of dict-like\n            Places where the objective function has already been evaluated.\n            Each suggestion is a dictionary where each key corresponds to a\n            parameter being optimized.\n        y : array-like, shape (n,)\n            Corresponding values where objective has been evaluated.\n        \"\"\"\n        assert len(X) == len(y)\n\n        for x_guess, y_ in zip(X, y):\n            x_guess_ = HyperoptOptimizer.hashable_dict(x_guess)\n            assert x_guess_ in self.trial_id_lookup, \"Appears to be guess that did not originate from suggest\"\n\n            trial_id = self.trial_id_lookup.pop(x_guess_)\n            trial = self.get_trial(trial_id)\n            assert self.cleanup_guess(trial[\"misc\"][\"vals\"]) == x_guess, \"trial ID not consistent with x values stored\"\n\n            # Cast to float to ensure native type\n            result = {\"loss\": float(y_), \"status\": STATUS_OK}\n            trial[\"state\"] = JOB_STATE_DONE\n            trial[\"result\"] = result\n        # hyperopt.fmin.FMinIter.serial_evaluate only does one refresh at end\n        # of loop of a bunch of evals, so we will do the same thing here.\n        self.trials.refresh()\n\n\nif __name__ == \"__main__\":\n    experiment_main(HyperoptOptimizer)\n"
  },
  {
    "path": "example_opt_root/nevergrad_optimizer.py",
    "content": "import nevergrad.optimization as optimization\nimport numpy as np\nfrom nevergrad import instrumentation as inst\nfrom scipy.stats import norm\n\nfrom bayesmark.abstract_optimizer import AbstractOptimizer\nfrom bayesmark.experiment import experiment_main\nfrom bayesmark.np_util import linear_rescale\nfrom bayesmark.space import Real\n\n\nclass NevergradOptimizer(AbstractOptimizer):\n    primary_import = \"nevergrad\"\n\n    def __init__(self, api_config, tool=\"OnePlusOne\", budget=300):\n        \"\"\"Build wrapper class to use nevergrad optimizer in benchmark.\n\n        Parameters\n        ----------\n        api_config : dict-like of dict-like\n            Configuration of the optimization variables. See API description.\n        budget : int\n            Expected number of max function evals\n        \"\"\"\n        AbstractOptimizer.__init__(self, api_config)\n\n        self.instrum, self.space = NevergradOptimizer.get_nvg_dimensions(api_config)\n\n        dimension = self.instrum.dimension\n        opt_class = optimization.registry[tool]\n        self.optim = opt_class(dimension=dimension, budget=budget)\n\n    @staticmethod\n    def get_nvg_dimensions(api_config):\n        \"\"\"Help routine to setup nevergrad search space in constructor.\n\n        Take api_config as argument so this can be static.\n        \"\"\"\n        # The ordering of iteration prob makes no difference, but just to be\n        # safe and consistnent with space.py, I will make sorted.\n        param_list = sorted(api_config.keys())\n\n        all_args = {}\n        all_prewarp = {}\n        for param_name in param_list:\n            param_config = api_config[param_name]\n\n            param_type = param_config[\"type\"]\n\n            param_space = param_config.get(\"space\", None)\n            param_range = param_config.get(\"range\", None)\n            param_values = param_config.get(\"values\", None)\n\n            prewarp = None\n            if param_type == \"cat\":\n                assert param_space is None\n                assert param_range is None\n                arg = inst.var.SoftmaxCategorical(param_values)\n            elif param_type == \"bool\":\n                assert param_space is None\n                assert param_range is None\n                assert param_values is None\n                arg = inst.var.OrderedDiscrete([False, True])\n            elif param_values is not None:\n                assert param_type in (\"int\", \"ordinal\", \"real\")\n                arg = inst.var.OrderedDiscrete(param_values)\n                # We are throwing away information here, but OrderedDiscrete\n                # appears to be invariant to monotonic transformation anyway.\n            elif param_type == \"int\":\n                assert param_values is None\n                # Need +1 since API in inclusive\n                choices = range(int(param_range[0]), int(param_range[-1]) + 1)\n                arg = inst.var.OrderedDiscrete(choices)\n                # We are throwing away information here, but OrderedDiscrete\n                # appears to be invariant to monotonic transformation anyway.\n            elif param_type == \"real\":\n                assert param_values is None\n                assert param_range is not None\n                # Will need to warp to this space sep.\n                arg = inst.var.Gaussian(mean=0, std=1)\n                prewarp = Real(warp=param_space, range_=param_range)\n            else:\n                assert False, \"type %s not handled in API\" % param_type\n\n            all_args[param_name] = arg\n            all_prewarp[param_name] = prewarp\n        instrum = inst.Instrumentation(**all_args)\n        return instrum, all_prewarp\n\n    def prewarp(self, xx):\n        \"\"\"Extra work needed to get variables into the Gaussian space\n        representation.\"\"\"\n        xxw = {}\n        for arg_name, vv in xx.items():\n            assert np.isscalar(vv)\n            space = self.space[arg_name]\n\n            if space is not None:\n                # Warp so we think it is apriori uniform in [a, b]\n                vv = space.warp(vv)\n                assert vv.size == 1\n\n                # Now make uniform on [0, 1], also unpack warped to scalar\n                (lb, ub), = space.get_bounds()\n                vv = linear_rescale(vv.item(), lb, ub, 0, 1)\n\n                # Now make std Gaussian apriori\n                vv = norm.ppf(vv)\n            assert np.isscalar(vv)\n            xxw[arg_name] = vv\n        return xxw\n\n    def postwarp(self, xxw):\n        \"\"\"Extra work needed to undo the Gaussian space representation.\"\"\"\n        xx = {}\n        for arg_name, vv in xxw.items():\n            assert np.isscalar(vv)\n            space = self.space[arg_name]\n\n            if space is not None:\n                # Now make std Gaussian apriori\n                vv = norm.cdf(vv)\n\n                # Now make uniform on [0, 1]\n                (lb, ub), = space.get_bounds()\n                vv = linear_rescale(vv, 0, 1, lb, ub)\n\n                # Warp so we think it is apriori uniform in [a, b]\n                vv = space.unwarp([vv])\n            assert np.isscalar(vv)\n            xx[arg_name] = vv\n        return xx\n\n    def suggest(self, n_suggestions=1):\n        \"\"\"Get suggestion from nevergrad.\n\n        Parameters\n        ----------\n        n_suggestions : int\n            Desired number of parallel suggestions in the output\n\n        Returns\n        -------\n        next_guess : list of dict\n            List of `n_suggestions` suggestions to evaluate the objective\n            function. Each suggestion is a dictionary where each key\n            corresponds to a parameter being optimized.\n        \"\"\"\n        x_guess_data = [self.optim.ask() for _ in range(n_suggestions)]\n\n        x_guess = [None] * n_suggestions\n        for ii, xx in enumerate(x_guess_data):\n            x_pos, x_kwarg = self.instrum.data_to_arguments(xx)\n            assert x_pos == ()\n            x_guess[ii] = self.postwarp(x_kwarg)\n\n        return x_guess\n\n    def observe(self, X, y):\n        \"\"\"Feed an observation back to nevergrad.\n\n        Parameters\n        ----------\n        X : list of dict-like\n            Places where the objective function has already been evaluated.\n            Each suggestion is a dictionary where each key corresponds to a\n            parameter being optimized.\n        y : array-like, shape (n,)\n            Corresponding values where objective has been evaluated\n        \"\"\"\n        for xx, yy in zip(X, y):\n            xx = self.prewarp(xx)\n            xx = self.instrum.arguments_to_data(**xx)\n            self.optim.tell(xx, yy)\n\n\nif __name__ == \"__main__\":\n    experiment_main(NevergradOptimizer)\n"
  },
  {
    "path": "example_opt_root/opentuner_optimizer.py",
    "content": "\"\"\"\nIn opentuner, many search techniques are already available. All the names of\nthe techniques can be found as follows:\n```\n>>> import opentuner\n>>> techniques, generators = opentuner.search.technique.all_techniques()\n>>> for t in techniques:\n...     print t.name\n```\nA user can also create new search techniques\n(http://opentuner.org/tutorial/techniques/).\n\nOpentuner will create a multi-arm bandit of multiple techniques if more than\none technique is specified in `args.technique`.\n\nSome bandits with pre-defined techniques are already registered in:\n`opentuner.search.bandittechniques`\n\nBy default, we use a pre-defined bandit called `'AUCBanditMetaTechniqueA'` of 4\ntechniques:\n```\nregister(AUCBanditMetaTechnique([\n        differentialevolution.DifferentialEvolutionAlt(),\n        evolutionarytechniques.UniformGreedyMutation(),\n        evolutionarytechniques.NormalGreedyMutation(mutation_rate=0.3),\n        simplextechniques.RandomNelderMead()],\n        name='AUCBanditMetaTechniqueA'))\n```\nThe other two bandits used in our experiments are: PSO_GA_DE and PSO_GA_Bandit.\nSpecifying a list of multiple techniques will use a multi-arm bandit over them.\n\"\"\"\nimport warnings\nfrom argparse import Namespace\n\nimport opentuner.tuningrunmain\nfrom opentuner.api import TuningRunManager\nfrom opentuner.measurement.interface import DefaultMeasurementInterface as DMI\nfrom opentuner.resultsdb.models import DesiredResult, Result\nfrom opentuner.search.manipulator import (\n    ConfigurationManipulator,\n    EnumParameter,\n    FloatParameter,\n    IntegerParameter,\n    LogFloatParameter,\n    LogIntegerParameter,\n    ScaledNumericParameter,\n)\n\nfrom bayesmark.abstract_optimizer import AbstractOptimizer\nfrom bayesmark.experiment import experiment_main\nfrom bayesmark.np_util import clip_chk\n\nDEFAULT_TECHNIQUES = (\"AUCBanditMetaTechniqueA\",)\nMEMORY_ONLY_DB = \"sqlite://\"\n\n# Monkey patch here! Opentuner is messed up, TuningRunMain changes global log\n# settings. We should file in issue report here and have them fix it.\nopentuner.tuningrunmain.init_logging = lambda: None\n\n\ndef ClippedParam(cls, epsilon=1e-5):\n    \"\"\"Build wrapper class of opentuner parameter class that use clip check to\n    keep parameters in the allowed range despite numerical errors.\n\n    Class built on `ScaledNumericParameter` abstract class defined in:\n    `opentuner.search.manipulator.ScaledNumericParameter`.\n\n    Parameters\n    ----------\n    cls : ScaledNumericParameter\n        Opentuner parameter class, such as `LogFloatParameter` or\n        `FloatParameter`, which transforms the domain of parameter.\n\n    Returns\n    -------\n    StableClass : ScaledNumericParameter\n        New class equivalent to original `cls` but it overwrites the orginal\n        `_unscale` method to enforce a clip check to keep the parameters within\n        their allowed range.\n    \"\"\"\n    assert issubclass(\n        cls, ScaledNumericParameter\n    ), \"this class cls should inherit from the ScaledNumericParameter class\"\n\n    class StableClass(cls):\n        def _unscale(self, v):\n            unscaled_v = super(StableClass, self)._unscale(v)\n            unscaled_v = clip_chk(unscaled_v, self.min_value, self.max_value)\n            return unscaled_v\n\n    return StableClass\n\n\nclass OpentunerOptimizer(AbstractOptimizer):\n    primary_import = \"opentuner\"\n\n    def __init__(self, api_config, techniques=DEFAULT_TECHNIQUES, n_suggestions=1):\n        \"\"\"Build wrapper class to use opentuner optimizer in benchmark.\n\n        Parameters\n        ----------\n        api_config : dict-like of dict-like\n            Configuration of the optimization variables. See API description.\n\n        techniques : iterable of strings\n            A list or tuple of techniques to use in opentuner. If the list\n            has only one technique, then that technique will be used. If the\n            list has multiple techniques a bandit over those techniques\n            will be used.\n\n        n_suggestions : int\n            Default number of suggestions to be made in parallel.\n        \"\"\"\n        AbstractOptimizer.__init__(self, api_config)\n\n        # Opentuner requires DesiredResult to reference suggestion when making\n        # its observation. x_to_dr maps the dict suggestion to DesiredResult.\n        self.x_to_dr = {}\n        # Keep last suggested x and repeat it whenever opentuner gives up.\n        self.dummy_suggest = None\n\n        \"\"\"Setting up the arguments for opentuner. You can see all possible\n        arguments using:\n        ```\n        >>> import opentuner\n        >>> opentuner.default_argparser().parse_args(['-h'])\n        ```\n        We only change a few arguments (other arguments are set to defaults):\n        * database = MEMORY_ONLY_DB: to use an in-memory sqlite database\n        * parallelism = n_suggestions: num of suggestions to give in parallel\n        * technique = techniques: a list of techniques to be used by opentuner\n        * print_params = False: to avoid opentuner from exiting after printing\n            param spaces\n        \"\"\"\n        args = Namespace(\n            bail_threshold=500,\n            database=MEMORY_ONLY_DB,\n            display_frequency=10,\n            generate_bandit_technique=False,\n            label=None,\n            list_techniques=False,\n            machine_class=None,\n            no_dups=False,\n            parallel_compile=False,\n            parallelism=n_suggestions,\n            pipelining=0,\n            print_params=False,\n            print_search_space_size=False,\n            quiet=False,\n            results_log=None,\n            results_log_details=None,\n            seed_configuration=[],\n            stop_after=None,\n            technique=techniques,\n            test_limit=5000,\n        )\n\n        # Setup some dummy classes required by opentuner to actually run.\n        manipulator = OpentunerOptimizer.build_manipulator(api_config)\n        interface = DMI(args=args, manipulator=manipulator)\n        self.api = TuningRunManager(interface, args)\n\n    @staticmethod\n    def hashable_dict(d):\n        \"\"\"A custom function for hashing dictionaries.\n\n        Parameters\n        ----------\n        d : dict or dict-like\n            The dictionary to be converted to immutable/hashable type.\n\n        Returns\n        -------\n        hashable_object : frozenset of tuple pairs\n            Bijective equivalent to dict that can be hashed.\n        \"\"\"\n        hashable_object = frozenset(d.items())\n        return hashable_object\n\n    @staticmethod\n    def build_manipulator(api_config):\n        \"\"\"Build a ConfigurationManipulator object to be used by opentuner.\n\n        Parameters\n        ----------\n        api_config : dict-like of dict-like\n            Configuration of the optimization variables. See API description.\n\n        Returns\n        -------\n        manipulator : ConfigurationManipulator\n            Some over complexified class required by opentuner to run.\n        \"\"\"\n        manipulator = ConfigurationManipulator()\n\n        for pname in api_config:\n            ptype = api_config[pname][\"type\"]\n            pspace = api_config[pname].get(\"space\", None)\n            pmin, pmax = api_config[pname].get(\"range\", (None, None))\n\n            if ptype == \"real\":\n                if pspace in (\"linear\", \"logit\"):\n                    ot_param = FloatParameter(pname, pmin, pmax)\n                elif pspace in (\"log\", \"bilog\"):\n                    LogFloatParameter_ = ClippedParam(LogFloatParameter)\n                    ot_param = LogFloatParameter_(pname, pmin, pmax)\n                else:\n                    assert False, \"unsupported param space = %s\" % pspace\n            elif ptype == \"int\":\n                if pspace in (\"linear\", \"logit\"):\n                    ot_param = IntegerParameter(pname, pmin, pmax)\n                elif pspace in (\"log\", \"bilog\"):\n                    ot_param = LogIntegerParameter(pname, pmin, pmax)\n                else:\n                    assert False, \"unsupported param space = %s\" % pspace\n            elif ptype == \"bool\":\n                # The actual bool parameter seems not to work in Py3 :(\n                ot_param = IntegerParameter(pname, 0, 1)\n            elif ptype in (\"cat\", \"ordinal\"):\n                # Treat ordinal and categorical variables the same for now.\n                assert \"values\" in api_config[pname]\n                pvalues = api_config[pname][\"values\"]\n                ot_param = EnumParameter(pname, pvalues)\n            else:\n                assert False, \"type=%s/space=%s not handled in opentuner yet\" % (ptype, pspace)\n            manipulator.add_parameter(ot_param)\n        return manipulator\n\n    def suggest(self, n_suggestions=1):\n        \"\"\"Make `n_suggestions` suggestions for what to evaluate next.\n\n        This requires the user observe all previous suggestions before calling\n        again.\n\n        Parameters\n        ----------\n        n_suggestions : int\n            The number of suggestions to return.\n\n        Returns\n        -------\n        next_guess : list of dict\n            List of `n_suggestions` suggestions to evaluate the objective\n            function. Each suggestion is a dictionary where each key\n            corresponds to a parameter being optimized.\n        \"\"\"\n        assert n_suggestions >= 1, \"invalid value for n_suggestions\"\n\n        # Update the n_suggestions if it is different from the current setting.\n        if self.api.search_driver.args.parallelism != n_suggestions:\n            self.api.search_driver.args.parallelism = n_suggestions\n            warnings.warn(\"n_suggestions changed across suggest calls\")\n\n        # Require the user to already observe all previous suggestions.\n        # Otherwise, opentuner will just recycle old suggestions.\n        assert len(self.x_to_dr) == 0, \"all the previous suggestions should have been observed by now\"\n\n        # The real meat of suggest from opentuner: Get next `n_suggestions`\n        # unique suggestions.\n        desired_results = [self.api.get_next_desired_result() for _ in range(n_suggestions)]\n\n        # Save DesiredResult object in dict since observe will need it.\n        X = []\n        using_dummy_suggest = False\n        for ii in range(n_suggestions):\n            # Opentuner can give up, but the API requires guessing forever.\n            if desired_results[ii] is None:\n                assert self.dummy_suggest is not None, \"opentuner gave up on the first call!\"\n                # Use the dummy suggestion in this case.\n                X.append(self.dummy_suggest)\n                using_dummy_suggest = True\n                continue\n\n            # Get the simple dict equivalent to suggestion.\n            x_guess = desired_results[ii].configuration.data\n            X.append(x_guess)\n\n            # Now save the desired result for future use in observe.\n            x_guess_ = OpentunerOptimizer.hashable_dict(x_guess)\n            assert x_guess_ not in self.x_to_dr, \"the suggestions should not already be in the x_to_dr dict\"\n            self.x_to_dr[x_guess_] = desired_results[ii]\n            # This will also catch None from opentuner.\n            assert isinstance(self.x_to_dr[x_guess_], DesiredResult)\n\n        assert len(X) == n_suggestions, \"incorrect number of suggestions provided by opentuner\"\n        # Log suggestion for repeating if opentuner gives up next time. We can\n        # only do this when it is not already being used since it we will be\n        # checking guesses against dummy_suggest in observe.\n        if not using_dummy_suggest:\n            self.dummy_suggest = X[-1]\n        return X\n\n    def observe(self, X, y):\n        \"\"\"Feed the observations back to opentuner.\n\n        Parameters\n        ----------\n        X : list of dict-like\n            Places where the objective function has already been evaluated.\n            Each suggestion is a dictionary where each key corresponds to a\n            parameter being optimized.\n        y : array-like, shape (n,)\n            Corresponding values where objective has been evaluated.\n        \"\"\"\n        assert len(X) == len(y)\n\n        for x_guess, y_ in zip(X, y):\n            x_guess_ = OpentunerOptimizer.hashable_dict(x_guess)\n\n            # If we can't find the dr object then it must be the dummy guess.\n            if x_guess_ not in self.x_to_dr:\n                assert x_guess == self.dummy_suggest, \"Appears to be guess that did not originate from suggest\"\n                continue\n\n            # Get the corresponding DesiredResult object.\n            dr = self.x_to_dr.pop(x_guess_, None)\n            # This will also catch None from opentuner.\n            assert isinstance(dr, DesiredResult), \"DesiredResult object not available in x_to_dr\"\n\n            # Opentuner's arg names assume we are minimizing execution time.\n            # So, if we want to minimize we have to pretend y is a 'time'.\n            result = Result(time=y_)\n            self.api.report_result(dr, result)\n\n\nif __name__ == \"__main__\":\n    experiment_main(OpentunerOptimizer)\n"
  },
  {
    "path": "example_opt_root/pysot_optimizer.py",
    "content": "import warnings\nfrom copy import copy\n\nimport numpy as np\nfrom poap.strategy import EvalRecord\nfrom pySOT.experimental_design import SymmetricLatinHypercube\nfrom pySOT.optimization_problems import OptimizationProblem\nfrom pySOT.strategy import SRBFStrategy\nfrom pySOT.surrogate import CubicKernel, LinearTail, RBFInterpolant\n\nfrom bayesmark.abstract_optimizer import AbstractOptimizer\nfrom bayesmark.experiment import experiment_main\nfrom bayesmark.space import JointSpace\n\n\nclass PySOTOptimizer(AbstractOptimizer):\n    primary_import = \"pysot\"\n\n    def __init__(self, api_config):\n        \"\"\"Build wrapper class to use an optimizer in benchmark.\n\n        Parameters\n        ----------\n        api_config : dict-like of dict-like\n            Configuration of the optimization variables. See API description.\n        \"\"\"\n        AbstractOptimizer.__init__(self, api_config)\n\n        self.space_x = JointSpace(api_config)\n        self.bounds = self.space_x.get_bounds()\n        self.create_opt_prob()  # Sets up the optimization problem (needs self.bounds)\n        self.max_evals = np.iinfo(np.int32).max  # NOTE: Largest possible int\n        self.batch_size = None\n        self.history = []\n        self.proposals = []\n\n    def create_opt_prob(self):\n        \"\"\"Create an optimization problem object.\"\"\"\n        opt = OptimizationProblem()\n        opt.lb = self.bounds[:, 0]  # In warped space\n        opt.ub = self.bounds[:, 1]  # In warped space\n        opt.dim = len(self.bounds)\n        opt.cont_var = np.arange(len(self.bounds))\n        opt.int_var = []\n        assert len(opt.cont_var) + len(opt.int_var) == opt.dim\n        opt.objfun = None\n        self.opt = opt\n\n    def start(self, max_evals):\n        \"\"\"Starts a new pySOT run.\"\"\"\n        self.history = []\n        self.proposals = []\n\n        # Symmetric Latin hypercube design\n        des_pts = max([self.batch_size, 2 * (self.opt.dim + 1)])\n        slhd = SymmetricLatinHypercube(dim=self.opt.dim, num_pts=des_pts)\n\n        # Warped RBF interpolant\n        rbf = RBFInterpolant(\n            dim=self.opt.dim,\n            lb=self.opt.lb,\n            ub=self.opt.ub,\n            kernel=CubicKernel(),\n            tail=LinearTail(self.opt.dim),\n            eta=1e-4,\n        )\n\n        # Optimization strategy\n        self.strategy = SRBFStrategy(\n            max_evals=self.max_evals,\n            opt_prob=self.opt,\n            exp_design=slhd,\n            surrogate=rbf,\n            asynchronous=True,\n            batch_size=1,\n            use_restarts=True,\n        )\n\n    def suggest(self, n_suggestions=1):\n        \"\"\"Get a suggestion from the optimizer.\n\n        Parameters\n        ----------\n        n_suggestions : int\n            Desired number of parallel suggestions in the output\n\n        Returns\n        -------\n        next_guess : list of dict\n            List of `n_suggestions` suggestions to evaluate the objective\n            function. Each suggestion is a dictionary where each key\n            corresponds to a parameter being optimized.\n        \"\"\"\n\n        if self.batch_size is None:  # First call to suggest\n            self.batch_size = n_suggestions\n            self.start(self.max_evals)\n\n        # Set the tolerances pretending like we are running batch\n        d, p = float(self.opt.dim), float(n_suggestions)\n        self.strategy.failtol = p * int(max(np.ceil(d / p), np.ceil(4 / p)))\n\n        # Now we can make suggestions\n        x_w = []\n        self.proposals = []\n        for _ in range(n_suggestions):\n            proposal = self.strategy.propose_action()\n            record = EvalRecord(proposal.args, status=\"pending\")\n            proposal.record = record\n            proposal.accept()  # This triggers all the callbacks\n\n            # It is possible that pySOT proposes a previously evaluated point\n            # when all variables are integers, so we just abort in this case\n            # since we have likely converged anyway. See PySOT issue #30.\n            x = list(proposal.record.params)  # From tuple to list\n            x_unwarped, = self.space_x.unwarp(x)\n            if x_unwarped in self.history:\n                warnings.warn(\"pySOT proposed the same point twice\")\n                self.start(self.max_evals)\n                return self.suggest(n_suggestions=n_suggestions)\n\n            # NOTE: Append unwarped to avoid rounding issues\n            self.history.append(copy(x_unwarped))\n            self.proposals.append(proposal)\n            x_w.append(copy(x_unwarped))\n\n        return x_w\n\n    def _observe(self, x, y):\n        # Find the matching proposal and execute its callbacks\n        idx = [x == xx for xx in self.history]\n        i = np.argwhere(idx)[0].item()  # Pick the first index if there are ties\n        proposal = self.proposals[i]\n        proposal.record.complete(y)\n        self.proposals.pop(i)\n        self.history.pop(i)\n\n    def observe(self, X, y):\n        \"\"\"Send an observation of a suggestion back to the optimizer.\n\n        Parameters\n        ----------\n        X : list of dict-like\n            Places where the objective function has already been evaluated.\n            Each suggestion is a dictionary where each key corresponds to a\n            parameter being optimized.\n        y : array-like, shape (n,)\n            Corresponding values where objective has been evaluated\n        \"\"\"\n        assert len(X) == len(y)\n\n        for x_, y_ in zip(X, y):\n            # Just ignore, any inf observations we got, unclear if right thing\n            if np.isfinite(y_):\n                self._observe(x_, y_)\n\n\nif __name__ == \"__main__\":\n    experiment_main(PySOTOptimizer)\n"
  },
  {
    "path": "example_opt_root/random_optimizer.py",
    "content": "import bayesmark.random_search as rs\nfrom bayesmark import np_util\nfrom bayesmark.abstract_optimizer import AbstractOptimizer\nfrom bayesmark.experiment import experiment_main\n\n\nclass RandomOptimizer(AbstractOptimizer):\n    # Unclear what is best package to list for primary_import here.\n    primary_import = \"bayesmark\"\n\n    def __init__(self, api_config, random=np_util.random):\n        \"\"\"Build wrapper class to use random search function in benchmark.\n\n        Settings for `suggest_dict` can be passed using kwargs.\n\n        Parameters\n        ----------\n        api_config : dict-like of dict-like\n            Configuration of the optimization variables. See API description.\n        \"\"\"\n        AbstractOptimizer.__init__(self, api_config)\n        self.random = random\n\n    def suggest(self, n_suggestions=1):\n        \"\"\"Get suggestion.\n\n        Parameters\n        ----------\n        n_suggestions : int\n            Desired number of parallel suggestions in the output\n\n        Returns\n        -------\n        next_guess : list of dict\n            List of `n_suggestions` suggestions to evaluate the objective\n            function. Each suggestion is a dictionary where each key\n            corresponds to a parameter being optimized.\n        \"\"\"\n        x_guess = rs.suggest_dict([], [], self.api_config, n_suggestions=n_suggestions, random=self.random)\n        return x_guess\n\n    def observe(self, X, y):\n        \"\"\"Feed an observation back.\n\n        Parameters\n        ----------\n        X : list of dict-like\n            Places where the objective function has already been evaluated.\n            Each suggestion is a dictionary where each key corresponds to a\n            parameter being optimized.\n        y : array-like, shape (n,)\n            Corresponding values where objective has been evaluated\n        \"\"\"\n        # Random search so don't do anything\n        pass\n\n\nif __name__ == \"__main__\":\n    experiment_main(RandomOptimizer)\n"
  },
  {
    "path": "example_opt_root/scikit_optimizer.py",
    "content": "import numpy as np\nfrom scipy.interpolate import interp1d\nfrom skopt import Optimizer as SkOpt\nfrom skopt.space import Categorical, Integer, Real\n\nfrom bayesmark.abstract_optimizer import AbstractOptimizer\nfrom bayesmark.experiment import experiment_main\n\n\nclass ScikitOptimizer(AbstractOptimizer):\n    primary_import = \"scikit-optimize\"\n\n    def __init__(self, api_config, base_estimator=\"GP\", acq_func=\"gp_hedge\", n_initial_points=5):\n        \"\"\"Build wrapper class to use an optimizer in benchmark.\n\n        Parameters\n        ----------\n        api_config : dict-like of dict-like\n            Configuration of the optimization variables. See API description.\n        base_estimator : {'GP', 'RF', 'ET', 'GBRT'}\n            How to estimate the objective function.\n        acq_func : {'LCB', 'EI', 'PI', 'gp_hedge', 'EIps', 'PIps'}\n            Acquisition objective to decide next suggestion.\n        n_initial_points : int\n            Number of points to sample randomly before actual Bayes opt.\n        \"\"\"\n        AbstractOptimizer.__init__(self, api_config)\n\n        dimensions, self.round_to_values = ScikitOptimizer.get_sk_dimensions(api_config)\n\n        # Older versions of skopt don't copy over the dimensions names during\n        # normalization and hence the names are missing in\n        # self.skopt.space.dimensions. Therefore, we save our own copy of\n        # dimensions list to be safe. If we can commit to using the newer\n        # versions of skopt we can delete self.dimensions.\n        self.dimensions_list = tuple(dd.name for dd in dimensions)\n\n        self.skopt = SkOpt(\n            dimensions,\n            n_initial_points=n_initial_points,\n            base_estimator=base_estimator,\n            acq_func=acq_func,\n            acq_optimizer=\"auto\",\n            acq_func_kwargs={},\n            acq_optimizer_kwargs={},\n        )\n\n    @staticmethod\n    def get_sk_dimensions(api_config, transform=\"normalize\"):\n        \"\"\"Help routine to setup skopt search space in constructor.\n\n        Take api_config as argument so this can be static.\n        \"\"\"\n        # The ordering of iteration prob makes no difference, but just to be\n        # safe and consistnent with space.py, I will make sorted.\n        param_list = sorted(api_config.keys())\n\n        sk_dims = []\n        round_to_values = {}\n        for param_name in param_list:\n            param_config = api_config[param_name]\n\n            param_type = param_config[\"type\"]\n\n            param_space = param_config.get(\"space\", None)\n            param_range = param_config.get(\"range\", None)\n            param_values = param_config.get(\"values\", None)\n\n            # Some setup for case that whitelist of values is provided:\n            values_only_type = param_type in (\"cat\", \"ordinal\")\n            if (param_values is not None) and (not values_only_type):\n                assert param_range is None\n                param_values = np.unique(param_values)\n                param_range = (param_values[0], param_values[-1])\n                round_to_values[param_name] = interp1d(\n                    param_values, param_values, kind=\"nearest\", fill_value=\"extrapolate\"\n                )\n\n            if param_type == \"int\":\n                # Integer space in sklearn does not support any warping => Need\n                # to leave the warping as linear in skopt.\n                sk_dims.append(Integer(param_range[0], param_range[-1], transform=transform, name=param_name))\n            elif param_type == \"bool\":\n                assert param_range is None\n                assert param_values is None\n                sk_dims.append(Integer(0, 1, transform=transform, name=param_name))\n            elif param_type in (\"cat\", \"ordinal\"):\n                assert param_range is None\n                # Leave x-form to one-hot as per skopt default\n                sk_dims.append(Categorical(param_values, name=param_name))\n            elif param_type == \"real\":\n                # Skopt doesn't support all our warpings, so need to pick\n                # closest substitute it does support.\n                prior = \"log-uniform\" if param_space in (\"log\", \"logit\") else \"uniform\"\n                sk_dims.append(Real(param_range[0], param_range[-1], prior=prior, transform=transform, name=param_name))\n            else:\n                assert False, \"type %s not handled in API\" % param_type\n        return sk_dims, round_to_values\n\n    def suggest(self, n_suggestions=1):\n        \"\"\"Get a suggestion from the optimizer.\n\n        Parameters\n        ----------\n        n_suggestions : int\n            Desired number of parallel suggestions in the output\n\n        Returns\n        -------\n        next_guess : list of dict\n            List of `n_suggestions` suggestions to evaluate the objective\n            function. Each suggestion is a dictionary where each key\n            corresponds to a parameter being optimized.\n        \"\"\"\n        # First get list of lists from skopt.ask()\n        next_guess = self.skopt.ask(n_points=n_suggestions)\n        # Then convert to list of dicts\n        next_guess = [dict(zip(self.dimensions_list, x)) for x in next_guess]\n\n        # Now do the rounding, custom rounding is not supported in skopt. Note\n        # that there is not nec a round function for each dimension here.\n        for param_name, round_f in self.round_to_values.items():\n            for xx in next_guess:\n                xx[param_name] = round_f(xx[param_name])\n        return next_guess\n\n    def observe(self, X, y):\n        \"\"\"Send an observation of a suggestion back to the optimizer.\n\n        Parameters\n        ----------\n        X : list of dict-like\n            Places where the objective function has already been evaluated.\n            Each suggestion is a dictionary where each key corresponds to a\n            parameter being optimized.\n        y : array-like, shape (n,)\n            Corresponding values where objective has been evaluated\n        \"\"\"\n        # Supposedly skopt can handle blocks, but not sure about interface for\n        # that. Just do loop to be safe for now.\n        for xx, yy in zip(X, y):\n            # skopt needs lists instead of dicts\n            xx = [xx[dim_name] for dim_name in self.dimensions_list]\n            # Just ignore, any inf observations we got, unclear if right thing\n            if np.isfinite(yy):\n                self.skopt.tell(xx, yy)\n\n\nif __name__ == \"__main__\":\n    experiment_main(ScikitOptimizer)\n"
  },
  {
    "path": "integration_test.sh",
    "content": "#!/bin/bash\n\nset -ex\nset -o pipefail\n\n# Be able to check if using version out of tar ball\nwhich bayesmark-launch\nwhich bayesmark-exp\nwhich bayesmark-agg\nwhich bayesmark-anal\n\nDB_ROOT=./notebooks\nDBID=bo_example_folder\n\nbayesmark-launch -n 15 -r 2 -dir $DB_ROOT -b $DBID -o RandomSearch PySOT OpenTuner-BanditA -c SVM DT -d boston breast -v\nbayesmark-agg -dir $DB_ROOT -b $DBID\nbayesmark-anal -dir $DB_ROOT -b $DBID -v\n\n# Try ipynb export\npython -m ipykernel install --name=bobm_ipynb --user\njupyter nbconvert --to html --execute notebooks/plot_mean_score.ipynb --ExecutePreprocessor.timeout=-1\njupyter nbconvert --to html --execute notebooks/plot_test_case.ipynb --ExecutePreprocessor.timeout=-1\n\n# Try dry run\nbayesmark-launch -n 15 -r 3 -dir $DB_ROOT -b $DBID -o RandomSearch PySOT OpenTuner-BanditA -c SVM DT -nj 50 -v\n\n# Try again but use the custom optimizers\nmv $DB_ROOT/$DBID old\nbayesmark-launch -n 15 -r 1 -dir $DB_ROOT -b $DBID -o RandomSearch PySOT-New OpenTuner-BanditA-New -c SVM DT --opt-root ./example_opt_root -d boston breast -v\nbayesmark-agg -dir $DB_ROOT -b $DBID\nbayesmark-anal -dir $DB_ROOT -b $DBID -v\n\n# Export again\njupyter nbconvert --to html --execute notebooks/plot_mean_score.ipynb --ExecutePreprocessor.timeout=-1\njupyter nbconvert --to html --execute notebooks/plot_test_case.ipynb --ExecutePreprocessor.timeout=-1\n\n# Try dry run\nbayesmark-launch -n 15 -r 2 -dir $DB_ROOT -b $DBID -o RandomSearch PySOT-New OpenTuner-BanditA-New -c SVM DT --opt-root ./example_opt_root -nj 50 -v\n\necho \"success\"\n"
  },
  {
    "path": "integration_test_with_setup.sh",
    "content": "#!/bin/bash\n\nset -ex\nset -o pipefail\n\nexport PIP_REQUIRE_VIRTUALENV=false\n\n# Handy to know what we are working with\ngit --version\npython --version\npip freeze | sort\n\n# Cleanup workspace, src for any old -e installs\ngit clean -x -f -d\nrm -rf src/\n\n# See if opentuner will work in env (but this command does not work on Mac)\n# dpkg -l | grep libsqlite\n\n# Simulate deployment with wheel\n./build_wheel.sh\nmv -v dist/bayesmark-* dist/bayesmark.tar.gz\n\n# Install and run local optimizers\nmkdir install_test\ncp -r ./notebooks install_test\ncp -r ./example_opt_root install_test\n\ncd install_test\nvirtualenv bobm_ipynb --python=python3\nsource ./bobm_ipynb/bin/activate\npython --version\npip freeze | sort\n\n# Remove this if we want to make sure everything is compatible with latest\n# pip install -r ../requirements/optimizers.txt\n\npip install ../dist/bayesmark.tar.gz[optimizers,notebooks]\n../integration_test.sh\n\n# wrap up\ndeactivate\ncd ..\n\necho \"success with setup wrapper too\"\n"
  },
  {
    "path": "notebooks/dummy.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nimport ipykernel\nimport jupyter\nimport jupyter_core\nimport nbconvert\n\n# import extra deps and use it to keep pipreqs and flake8 happy\nfor pkg in (ipykernel, jupyter, jupyter_core, nbconvert):\n    print(\"%s %s\" % (pkg.__name__, pkg.__version__))\n"
  },
  {
    "path": "notebooks/plot_mean_score.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import numpy as np\\n\",\n    \"import matplotlib.pyplot as plt\\n\",\n    \"from matplotlib import cm, colors, rcParams\\n\",\n    \"\\n\",\n    \"import bayesmark.constants as cc\\n\",\n    \"import bayesmark.xr_util as xru\\n\",\n    \"from bayesmark.serialize import XRSerializer\\n\",\n    \"from bayesmark.constants import ITER, METHOD, ARG_DELIM, OBJECTIVE, VISIBLE_TO_OPT\\n\",\n    \"from bayesmark.path_util import abspath\\n\",\n    \"from bayesmark.util import preimage_func\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# User settings, must specify location of the data to make plots here for this to run\\n\",\n    \"DB_ROOT = abspath(\\\".\\\")\\n\",\n    \"DBID = \\\"bo_example_folder\\\"\\n\",\n    \"metric_for_scoring = VISIBLE_TO_OPT\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Matplotlib setup\\n\",\n    \"# Note this will put type-3 font BS in the pdfs, if it matters\\n\",\n    \"rcParams[\\\"mathtext.fontset\\\"] = \\\"stix\\\"\\n\",\n    \"rcParams[\\\"font.family\\\"] = \\\"STIXGeneral\\\"\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def build_color_dict(names):\\n\",\n    \"    \\\"\\\"\\\"Make a color dictionary to give each name a mpl color.\\n\",\n    \"    \\\"\\\"\\\"\\n\",\n    \"    norm = colors.Normalize(vmin=0, vmax=1)\\n\",\n    \"    m = cm.ScalarMappable(norm, cm.tab20)\\n\",\n    \"    color_dict = m.to_rgba(np.linspace(0, 1, len(names)))\\n\",\n    \"    color_dict = dict(zip(names, color_dict))\\n\",\n    \"    return color_dict\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Load the data\\n\",\n    \"summary_ds, meta = XRSerializer.load_derived(DB_ROOT, db=DBID, key=cc.MEAN_SCORE)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"method_to_rgba = build_color_dict(summary_ds.coords[METHOD].values.tolist())\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Group methods by the package behind them\\n\",\n    \"method_only = lambda method_rev: method_rev.split(ARG_DELIM, 1)[0]\\n\",\n    \"groups = preimage_func(method_only, summary_ds.coords[METHOD].values)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Make a plot for each package\\n\",\n    \"for method_name in groups:\\n\",\n    \"    plt.figure(figsize=(5, 5), dpi=300)\\n\",\n    \"    for method_ver_name in groups[method_name]:\\n\",\n    \"        curr_ds = summary_ds.sel({METHOD: method_ver_name, OBJECTIVE: metric_for_scoring})\\n\",\n    \"        curr_ds.coords[ITER].values\\n\",\n    \"\\n\",\n    \"        plt.fill_between(\\n\",\n    \"            curr_ds.coords[ITER].values,\\n\",\n    \"            curr_ds[cc.LB_MED].values,\\n\",\n    \"            curr_ds[cc.UB_MED].values,\\n\",\n    \"            color=method_to_rgba[method_ver_name],\\n\",\n    \"            alpha=0.5,\\n\",\n    \"        )\\n\",\n    \"        plt.plot(\\n\",\n    \"            curr_ds.coords[ITER].values,\\n\",\n    \"            curr_ds[cc.PERF_MED].values,\\n\",\n    \"            color=method_to_rgba[method_ver_name],\\n\",\n    \"            label=method_name,\\n\",\n    \"            marker=\\\".\\\",\\n\",\n    \"        )\\n\",\n    \"    plt.xlabel(\\\"evaluation\\\", fontsize=10)\\n\",\n    \"    plt.ylabel(\\\"normalized median score\\\", fontsize=10)\\n\",\n    \"    plt.title(method_name)\\n\",\n    \"    plt.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc=\\\"upper left\\\", borderaxespad=0.0)\\n\",\n    \"    plt.grid()\\n\",\n    \"\\n\",\n    \"    plt.figure(figsize=(5, 5), dpi=300)\\n\",\n    \"    for method_ver_name in groups[method_name]:\\n\",\n    \"        curr_ds = summary_ds.sel({METHOD: method_ver_name, OBJECTIVE: metric_for_scoring})\\n\",\n    \"        curr_ds.coords[ITER].values\\n\",\n    \"\\n\",\n    \"        plt.fill_between(\\n\",\n    \"            curr_ds.coords[ITER].values,\\n\",\n    \"            curr_ds[cc.LB_MEAN].values,\\n\",\n    \"            curr_ds[cc.UB_MEAN].values,\\n\",\n    \"            color=method_to_rgba[method_ver_name],\\n\",\n    \"            alpha=0.5,\\n\",\n    \"        )\\n\",\n    \"        plt.plot(\\n\",\n    \"            curr_ds.coords[ITER].values,\\n\",\n    \"            curr_ds[cc.PERF_MEAN].values,\\n\",\n    \"            color=method_to_rgba[method_ver_name],\\n\",\n    \"            label=method_name,\\n\",\n    \"            marker=\\\".\\\",\\n\",\n    \"        )\\n\",\n    \"    plt.xlabel(\\\"evaluation\\\", fontsize=10)\\n\",\n    \"    plt.ylabel(\\\"mean score\\\", fontsize=10)\\n\",\n    \"    plt.title(method_name)\\n\",\n    \"    plt.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc=\\\"upper left\\\", borderaxespad=0.0)\\n\",\n    \"    plt.grid()\\n\",\n    \"\\n\",\n    \"    plt.figure(figsize=(5, 5), dpi=300)\\n\",\n    \"    for method_ver_name in groups[method_name]:\\n\",\n    \"        curr_ds = summary_ds.sel({METHOD: method_ver_name, OBJECTIVE: metric_for_scoring})\\n\",\n    \"        curr_ds.coords[ITER].values\\n\",\n    \"\\n\",\n    \"        plt.fill_between(\\n\",\n    \"            curr_ds.coords[ITER].values,\\n\",\n    \"            curr_ds[cc.LB_NORMED_MEAN].values,\\n\",\n    \"            curr_ds[cc.UB_NORMED_MEAN].values,\\n\",\n    \"            color=method_to_rgba[method_ver_name],\\n\",\n    \"            alpha=0.5,\\n\",\n    \"        )\\n\",\n    \"        plt.plot(\\n\",\n    \"            curr_ds.coords[ITER].values,\\n\",\n    \"            curr_ds[cc.NORMED_MEAN].values,\\n\",\n    \"            color=method_to_rgba[method_ver_name],\\n\",\n    \"            label=method_name,\\n\",\n    \"            marker=\\\".\\\",\\n\",\n    \"        )\\n\",\n    \"    plt.xlabel(\\\"evaluation\\\", fontsize=10)\\n\",\n    \"    plt.ylabel(\\\"normalized mean score\\\", fontsize=10)\\n\",\n    \"    plt.title(method_name)\\n\",\n    \"    plt.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc=\\\"upper left\\\", borderaxespad=0.0)\\n\",\n    \"    plt.grid()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Make the summary plot\\n\",\n    \"plt.figure(figsize=(5, 5), dpi=300)\\n\",\n    \"for method_ver_name in summary_ds.coords[METHOD].values:\\n\",\n    \"    curr_ds = summary_ds.sel({METHOD: method_ver_name, OBJECTIVE: metric_for_scoring})\\n\",\n    \"    curr_ds.coords[ITER].values\\n\",\n    \"\\n\",\n    \"    plt.fill_between(\\n\",\n    \"        curr_ds.coords[ITER].values,\\n\",\n    \"        curr_ds[cc.LB_MED].values,\\n\",\n    \"        curr_ds[cc.UB_MED].values,\\n\",\n    \"        color=method_to_rgba[method_ver_name],\\n\",\n    \"        alpha=0.5,\\n\",\n    \"    )\\n\",\n    \"    plt.plot(\\n\",\n    \"        curr_ds.coords[ITER].values,\\n\",\n    \"        curr_ds[cc.PERF_MED].values,\\n\",\n    \"        color=method_to_rgba[method_ver_name],\\n\",\n    \"        label=method_ver_name,\\n\",\n    \"        marker=\\\".\\\",\\n\",\n    \"    )\\n\",\n    \"plt.xlabel(\\\"evaluation\\\", fontsize=10)\\n\",\n    \"plt.ylabel(\\\"normalized median score\\\", fontsize=10)\\n\",\n    \"plt.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc=\\\"upper left\\\", borderaxespad=0.0)\\n\",\n    \"plt.grid()\\n\",\n    \"\\n\",\n    \"plt.figure(figsize=(5, 5), dpi=300)\\n\",\n    \"for method_ver_name in summary_ds.coords[METHOD].values:\\n\",\n    \"    curr_ds = summary_ds.sel({METHOD: method_ver_name, OBJECTIVE: metric_for_scoring})\\n\",\n    \"    curr_ds.coords[ITER].values\\n\",\n    \"\\n\",\n    \"    plt.fill_between(\\n\",\n    \"        curr_ds.coords[ITER].values,\\n\",\n    \"        curr_ds[cc.LB_MEAN].values,\\n\",\n    \"        curr_ds[cc.UB_MEAN].values,\\n\",\n    \"        color=method_to_rgba[method_ver_name],\\n\",\n    \"        alpha=0.5,\\n\",\n    \"    )\\n\",\n    \"    plt.plot(\\n\",\n    \"        curr_ds.coords[ITER].values,\\n\",\n    \"        curr_ds[cc.PERF_MEAN].values,\\n\",\n    \"        color=method_to_rgba[method_ver_name],\\n\",\n    \"        label=method_ver_name,\\n\",\n    \"        marker=\\\".\\\",\\n\",\n    \"    )\\n\",\n    \"plt.xlabel(\\\"evaluation\\\", fontsize=10)\\n\",\n    \"plt.ylabel(\\\"mean score\\\", fontsize=10)\\n\",\n    \"plt.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc=\\\"upper left\\\", borderaxespad=0.0)\\n\",\n    \"plt.grid()\\n\",\n    \"\\n\",\n    \"plt.figure(figsize=(5, 5), dpi=300)\\n\",\n    \"for method_ver_name in summary_ds.coords[METHOD].values:\\n\",\n    \"    curr_ds = summary_ds.sel({METHOD: method_ver_name, OBJECTIVE: metric_for_scoring})\\n\",\n    \"    curr_ds.coords[ITER].values\\n\",\n    \"\\n\",\n    \"    plt.fill_between(\\n\",\n    \"        curr_ds.coords[ITER].values,\\n\",\n    \"        curr_ds[cc.LB_NORMED_MEAN].values,\\n\",\n    \"        curr_ds[cc.UB_NORMED_MEAN].values,\\n\",\n    \"        color=method_to_rgba[method_ver_name],\\n\",\n    \"        alpha=0.5,\\n\",\n    \"    )\\n\",\n    \"    plt.plot(\\n\",\n    \"        curr_ds.coords[ITER].values,\\n\",\n    \"        curr_ds[cc.NORMED_MEAN].values,\\n\",\n    \"        color=method_to_rgba[method_ver_name],\\n\",\n    \"        label=method_ver_name,\\n\",\n    \"        marker=\\\".\\\",\\n\",\n    \"    )\\n\",\n    \"plt.xlabel(\\\"evaluation\\\", fontsize=10)\\n\",\n    \"plt.ylabel(\\\"normalized mean score\\\", fontsize=10)\\n\",\n    \"plt.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc=\\\"upper left\\\", borderaxespad=0.0)\\n\",\n    \"plt.grid()\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"bobm_ipynb\",\n   \"language\": \"python\",\n   \"name\": \"bobm_ipynb\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.6.5\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "notebooks/plot_test_case.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import matplotlib.pyplot as plt\\n\",\n    \"from matplotlib import cm, colors, rcParams\\n\",\n    \"\\n\",\n    \"import numpy as np\\n\",\n    \"\\n\",\n    \"import bayesmark.constants as cc\\n\",\n    \"from bayesmark.path_util import abspath\\n\",\n    \"from bayesmark.serialize import XRSerializer\\n\",\n    \"from bayesmark.constants import ITER, METHOD, TEST_CASE, OBJECTIVE, VISIBLE_TO_OPT\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# User settings, must specify location of the data to make plots here for this to run\\n\",\n    \"DB_ROOT = abspath(\\\".\\\")\\n\",\n    \"DBID = \\\"bo_example_folder\\\"\\n\",\n    \"metric_for_scoring = VISIBLE_TO_OPT\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Matplotlib setup\\n\",\n    \"# Note this will put type-3 font BS in the pdfs, if it matters\\n\",\n    \"rcParams[\\\"mathtext.fontset\\\"] = \\\"stix\\\"\\n\",\n    \"rcParams[\\\"font.family\\\"] = \\\"STIXGeneral\\\"\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def build_color_dict(names):\\n\",\n    \"    \\\"\\\"\\\"Make a color dictionary to give each name a mpl color.\\n\",\n    \"    \\\"\\\"\\\"\\n\",\n    \"    norm = colors.Normalize(vmin=0, vmax=1)\\n\",\n    \"    m = cm.ScalarMappable(norm, cm.tab20)\\n\",\n    \"    color_dict = m.to_rgba(np.linspace(0, 1, len(names)))\\n\",\n    \"    color_dict = dict(zip(names, color_dict))\\n\",\n    \"    return color_dict\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Load the data\\n\",\n    \"agg_results_ds, meta = XRSerializer.load_derived(DB_ROOT, db=DBID, key=cc.PERF_RESULTS)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Setup for plotting\\n\",\n    \"method_list = agg_results_ds.coords[METHOD].values\\n\",\n    \"method_to_rgba = build_color_dict(method_list.tolist())\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Make the plots for inidividual test functions\\n\",\n    \"for func_name in agg_results_ds.coords[TEST_CASE].values:\\n\",\n    \"    plt.figure(figsize=(5, 5), dpi=300)\\n\",\n    \"    for method_name in method_list:\\n\",\n    \"        curr_ds = agg_results_ds.sel({TEST_CASE: func_name, METHOD: method_name, OBJECTIVE: metric_for_scoring})\\n\",\n    \"\\n\",\n    \"        plt.fill_between(\\n\",\n    \"            curr_ds.coords[ITER].values,\\n\",\n    \"            curr_ds[cc.LB_MED].values,\\n\",\n    \"            curr_ds[cc.UB_MED].values,\\n\",\n    \"            color=method_to_rgba[method_name],\\n\",\n    \"            alpha=0.5,\\n\",\n    \"        )\\n\",\n    \"        plt.plot(\\n\",\n    \"            curr_ds.coords[ITER].values,\\n\",\n    \"            curr_ds[cc.PERF_MED].values,\\n\",\n    \"            color=method_to_rgba[method_name],\\n\",\n    \"            label=method_name,\\n\",\n    \"            marker=\\\".\\\",\\n\",\n    \"        )\\n\",\n    \"    plt.xlabel(\\\"evaluation\\\", fontsize=10)\\n\",\n    \"    plt.ylabel(\\\"median score\\\", fontsize=10)\\n\",\n    \"    plt.title(func_name)\\n\",\n    \"    plt.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc=\\\"upper left\\\", borderaxespad=0.0)\\n\",\n    \"    plt.grid()\\n\",\n    \"\\n\",\n    \"    plt.figure(figsize=(5, 5), dpi=300)\\n\",\n    \"    for method_name in method_list:\\n\",\n    \"        curr_ds = agg_results_ds.sel({TEST_CASE: func_name, METHOD: method_name, OBJECTIVE: metric_for_scoring})\\n\",\n    \"\\n\",\n    \"        plt.fill_between(\\n\",\n    \"            curr_ds.coords[ITER].values,\\n\",\n    \"            curr_ds[cc.LB_MEAN].values,\\n\",\n    \"            curr_ds[cc.UB_MEAN].values,\\n\",\n    \"            color=method_to_rgba[method_name],\\n\",\n    \"            alpha=0.5,\\n\",\n    \"        )\\n\",\n    \"        plt.plot(\\n\",\n    \"            curr_ds.coords[ITER].values,\\n\",\n    \"            curr_ds[cc.PERF_MEAN].values,\\n\",\n    \"            color=method_to_rgba[method_name],\\n\",\n    \"            label=method_name,\\n\",\n    \"            marker=\\\".\\\",\\n\",\n    \"        )\\n\",\n    \"    plt.xlabel(\\\"evaluation\\\", fontsize=10)\\n\",\n    \"    plt.ylabel(\\\"mean score\\\", fontsize=10)\\n\",\n    \"    plt.title(func_name)\\n\",\n    \"    plt.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc=\\\"upper left\\\", borderaxespad=0.0)\\n\",\n    \"    plt.grid()\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"bobm_ipynb\",\n   \"language\": \"python\",\n   \"name\": \"bobm_ipynb\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.6.5\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "requirements/base.in",
    "content": "scipy==1.2.0\npandas==0.24.0\npathvalidate==0.29.0\nnumpy==1.16.1\nGitPython==2.1.11\nimportlib-metadata==0.18\nscikit-learn==0.20.2\nxarray==0.12.2\n"
  },
  {
    "path": "requirements/base.txt",
    "content": "# SHA1:7ebe4df9e60f001b676e74ae561d5dc3202c3dd0\n#\n# This file is autogenerated by pip-compile-multi\n# To update, run:\n#\n#    pip-compile-multi\n#\ngitdb2==2.0.5             # via gitpython\ngitpython==2.1.11         # via -r requirements/base.in\nimportlib-metadata==0.18  # via -r requirements/base.in\nnumpy==1.16.1             # via -r requirements/base.in, pandas, scikit-learn, scipy, xarray\npandas==0.24.0            # via -r requirements/base.in, xarray\npathvalidate==0.29.0      # via -r requirements/base.in\npython-dateutil==2.8.0    # via pandas\npytz==2019.1              # via pandas\nscikit-learn==0.20.2      # via -r requirements/base.in\nscipy==1.2.0              # via -r requirements/base.in, scikit-learn\nsix==1.12.0               # via python-dateutil\nsmmap2==2.0.5             # via gitdb2\nxarray==0.12.2            # via -r requirements/base.in\nzipp==0.5.2               # via importlib-metadata\n\n# The following packages are considered to be unsafe in a requirements file:\n# setuptools\n"
  },
  {
    "path": "requirements/docs.in",
    "content": "-r base.in\nSphinx==2.1.2\n"
  },
  {
    "path": "requirements/docs.txt",
    "content": "# SHA1:cde26afc07f6c9c1c6cb169e125fc5142a0c59ae\n#\n# This file is autogenerated by pip-compile-multi\n# To update, run:\n#\n#    pip-compile-multi\n#\n-r base.txt\nalabaster==0.7.12         # via sphinx\nattrs==19.1.0             # via packaging\nbabel==2.7.0              # via sphinx\ncertifi==2019.6.16        # via requests\nchardet==3.0.4            # via requests\ndocutils==0.15            # via sphinx\nidna==2.8                 # via requests\nimagesize==1.1.0          # via sphinx\njinja2==2.10.1            # via sphinx\nmarkupsafe==1.1.1         # via jinja2\npackaging==19.1           # via sphinx\npygments==2.4.2           # via sphinx\npyparsing==2.4.2          # via packaging\nrequests==2.22.0          # via sphinx\nsnowballstemmer==1.9.0    # via sphinx\nsphinx==2.1.2             # via -r requirements/docs.in\nsphinxcontrib-applehelp==1.0.1  # via sphinx\nsphinxcontrib-devhelp==1.0.1  # via sphinx\nsphinxcontrib-htmlhelp==1.0.2  # via sphinx\nsphinxcontrib-jsmath==1.0.1  # via sphinx\nsphinxcontrib-qthelp==1.0.2  # via sphinx\nsphinxcontrib-serializinghtml==1.1.3  # via sphinx\nurllib3==1.25.3           # via requests\n\n# The following packages are considered to be unsafe in a requirements file:\n# setuptools\n"
  },
  {
    "path": "requirements/ipynb.in",
    "content": "-r base.in\nipykernel==5.1.1\nnbconvert==5.6.0\njupyter==1.0.0\njupyter-core==4.6.0\nmatplotlib==3.1.1\nnumpy==1.16.1\n"
  },
  {
    "path": "requirements/ipynb.txt",
    "content": "# SHA1:6c16d140e48d7e7fa0e157c053953db7d76f0caf\n#\n# This file is autogenerated by pip-compile-multi\n# To update, run:\n#\n#    pip-compile-multi\n#\n-r base.txt\nappnope==0.1.0            # via ipython\nattrs==19.1.0             # via jsonschema\nbackcall==0.1.0           # via ipython\nbleach==3.1.0             # via nbconvert\ncycler==0.10.0            # via matplotlib\ndecorator==4.4.0          # via ipython, traitlets\ndefusedxml==0.6.0         # via nbconvert\nentrypoints==0.3          # via nbconvert\nipykernel==5.1.1          # via -r requirements/ipynb.in, ipywidgets, jupyter, jupyter-console, notebook, qtconsole\nipython-genutils==0.2.0   # via nbformat, notebook, qtconsole, traitlets\nipython==7.6.1            # via ipykernel, ipywidgets, jupyter-console\nipywidgets==7.5.1         # via jupyter\njedi==0.14.1              # via ipython\njinja2==2.10.1            # via nbconvert, notebook\njsonschema==3.0.2         # via nbformat\njupyter-client==5.3.1     # via ipykernel, jupyter-console, notebook, qtconsole\njupyter-console==6.0.0    # via jupyter\njupyter-core==4.6.0       # via -r requirements/ipynb.in, jupyter-client, nbconvert, nbformat, notebook, qtconsole\njupyter==1.0.0            # via -r requirements/ipynb.in\nkiwisolver==1.1.0         # via matplotlib\nmarkupsafe==1.1.1         # via jinja2\nmatplotlib==3.1.1         # via -r requirements/ipynb.in\nmistune==0.8.4            # via nbconvert\nnbconvert==5.6.0          # via -r requirements/ipynb.in, jupyter, notebook\nnbformat==4.4.0           # via ipywidgets, nbconvert, notebook\nnotebook==6.0.1           # via jupyter, widgetsnbextension\npandocfilters==1.4.2      # via nbconvert\nparso==0.5.1              # via jedi\npexpect==4.7.0            # via ipython\npickleshare==0.7.5        # via ipython\nprometheus-client==0.7.1  # via notebook\nprompt-toolkit==2.0.9     # via ipython, jupyter-console\nptyprocess==0.6.0         # via pexpect, terminado\npygments==2.4.2           # via ipython, jupyter-console, nbconvert, qtconsole\npyparsing==2.4.2          # via matplotlib\npyrsistent==0.15.4        # via jsonschema\npyzmq==18.0.2             # via jupyter-client, notebook\nqtconsole==4.5.5          # via jupyter\nsend2trash==1.5.0         # via notebook\nterminado==0.8.2          # via notebook\ntestpath==0.4.2           # via nbconvert\ntornado==6.0.3            # via ipykernel, jupyter-client, notebook, terminado\ntraitlets==4.3.2          # via ipykernel, ipython, ipywidgets, jupyter-client, jupyter-core, nbconvert, nbformat, notebook, qtconsole\nwcwidth==0.1.7            # via prompt-toolkit\nwebencodings==0.5.1       # via bleach\nwidgetsnbextension==3.5.1  # via ipywidgets\n\n# The following packages are considered to be unsafe in a requirements file:\n# setuptools\n"
  },
  {
    "path": "requirements/optimizers.in",
    "content": "-r base.in\nopentuner==0.8.2\nnumpy==1.16.1\nscipy==1.2.0\nnevergrad==0.1.4\nhyperopt==0.1.1\nPOAP==0.1.26\nscikit-optimize==0.5.2\npySOT==0.3.3\n"
  },
  {
    "path": "requirements/optimizers.txt",
    "content": "# SHA1:08174a35f9973427450f549131b4438e2f116a88\n#\n# This file is autogenerated by pip-compile-multi\n# To update, run:\n#\n#    pip-compile-multi\n#\n-r base.txt\nargparse==1.4.0           # via opentuner\natomicwrites==1.3.0       # via pytest\nattrs==19.1.0             # via packaging, pytest\nbayesian-optimization==0.6.0  # via nevergrad\ncertifi==2019.6.16        # via requests\nchardet==3.0.4            # via requests\ncma==2.7.0                # via nevergrad\ncoverage==4.5.4           # via nevergrad\ncycler==0.10.0            # via matplotlib\ndecorator==4.4.0          # via networkx\ndill==0.3.0               # via pysot\nfn==0.4.3                 # via opentuner\nfuture==0.17.1            # via hyperopt, opentuner\ngenty==1.3.2              # via nevergrad\nhyperopt==0.1.1           # via -r requirements/optimizers.in\nidna==2.8                 # via requests\njoblib==0.13.2            # via nevergrad\nkiwisolver==1.1.0         # via matplotlib\nmatplotlib==3.1.1         # via nevergrad\nmore-itertools==7.2.0     # via pytest\nmypy-extensions==0.4.1    # via mypy\nmypy==0.720               # via nevergrad\nnetworkx==2.3             # via hyperopt\nnevergrad==0.1.4          # via -r requirements/optimizers.in\nnose-timer==0.7.5         # via nevergrad\nnose==1.3.7               # via nevergrad, nose-timer\nopentuner==0.8.2          # via -r requirements/optimizers.in\npackaging==19.1           # via pytest\npluggy==0.12.0            # via pytest\npoap==0.1.26              # via -r requirements/optimizers.in, pysot\npy==1.8.0                 # via pytest\npydoe2==1.2.0             # via pysot\npymongo==3.8.0            # via hyperopt\npyparsing==2.4.2          # via matplotlib, packaging\npysot==0.3.3              # via -r requirements/optimizers.in\npytest==5.0.1             # via pysot\nrequests==2.22.0          # via nevergrad\nscikit-optimize==0.5.2    # via -r requirements/optimizers.in\nsqlalchemy==1.3.8         # via opentuner\ntyped-ast==1.4.0          # via mypy\ntyping-extensions==3.7.4  # via mypy, nevergrad\nurllib3==1.25.3           # via requests\nwcwidth==0.1.7            # via pytest\nxlrd==1.2.0               # via nevergrad\nxlwt==1.3.0               # via nevergrad\n\n# The following packages are considered to be unsafe in a requirements file:\n# setuptools\n"
  },
  {
    "path": "requirements/pipreqs_edits.sed",
    "content": "/argparse/d\n/appnope/d\n/certifi/d\n/bayesmark/d\n"
  },
  {
    "path": "requirements/self.txt",
    "content": "bayesmark==0.0.8\n"
  },
  {
    "path": "requirements/test.in",
    "content": "-r base.in\n-r optimizers.in\nhypothesis==4.32.3\nhypothesis-gufunc==0.0.5rc2\nnumpy==1.16.1\npathvalidate==0.29.0\nscipy==1.2.0\nscikit-learn==0.20.2\nxarray==0.12.2\npytest==5.0.1\npytest-cov==2.7.1\n"
  },
  {
    "path": "requirements/test.txt",
    "content": "# SHA1:0dd8b5c26e6671e320706ddd399f6f62e19f3189\n#\n# This file is autogenerated by pip-compile-multi\n# To update, run:\n#\n#    pip-compile-multi\n#\n-r base.txt\n-r optimizers.txt\nhypothesis-gufunc==0.0.5rc2  # via -r requirements/test.in\nhypothesis==4.32.3        # via -r requirements/test.in, hypothesis-gufunc\npytest-cov==2.7.1         # via -r requirements/test.in\n\n# The following packages are considered to be unsafe in a requirements file:\n# setuptools\n"
  },
  {
    "path": "requirements/tools.in",
    "content": "detect-secrets==0.12.5\nipykernel==5.1.1\nnbconvert==5.6.0\npip-compile-multi==1.4.0\npipreqs==0.4.9\npre-commit==1.15.2\npytest==5.0.1\n"
  },
  {
    "path": "requirements/tools.txt",
    "content": "# SHA1:08f4ed4790290aab315dd20169793be4f0a974af\n#\n# This file is autogenerated by pip-compile-multi\n# To update, run:\n#\n#    pip-compile-multi\n#\nappnope==0.1.0            # via ipython\naspy.yaml==1.3.0          # via pre-commit\natomicwrites==1.3.0       # via pytest\nattrs==19.1.0             # via jsonschema, packaging, pytest\nbackcall==0.1.0           # via ipython\nbleach==3.1.0             # via nbconvert\ncertifi==2019.6.16        # via requests\ncfgv==2.0.1               # via pre-commit\nchardet==3.0.4            # via requests\nclick==7.0                # via pip-compile-multi, pip-tools\ndecorator==4.4.0          # via ipython, traitlets\ndefusedxml==0.6.0         # via nbconvert\ndetect-secrets==0.12.5    # via -r requirements/tools.in\ndocopt==0.6.2             # via pipreqs\nentrypoints==0.3          # via nbconvert\nidentify==1.4.5           # via pre-commit\nidna==2.8                 # via requests\nimportlib-metadata==0.18  # via importlib-resources, pluggy, pre-commit, pytest\nimportlib-resources==2.0.1  # via pre-commit\nipykernel==5.1.1          # via -r requirements/tools.in\nipython-genutils==0.2.0   # via nbformat, traitlets\nipython==7.6.1            # via ipykernel\njedi==0.14.1              # via ipython\njinja2==2.10.1            # via nbconvert\njsonschema==3.0.2         # via nbformat\njupyter-client==5.3.1     # via ipykernel\njupyter-core==4.6.0       # via jupyter-client, nbconvert, nbformat\nmarkupsafe==1.1.1         # via jinja2\nmistune==0.8.4            # via nbconvert\nmore-itertools==7.2.0     # via pytest\nnbconvert==5.6.0          # via -r requirements/tools.in\nnbformat==4.4.0           # via nbconvert\nnodeenv==1.3.3            # via pre-commit\npackaging==19.1           # via pytest\npandocfilters==1.4.2      # via nbconvert\nparso==0.5.1              # via jedi\npexpect==4.7.0            # via ipython\npickleshare==0.7.5        # via ipython\npip-compile-multi==1.4.0  # via -r requirements/tools.in\npip-tools==5.0.0          # via pip-compile-multi\npipreqs==0.4.9            # via -r requirements/tools.in\npluggy==0.12.0            # via pytest\npre-commit==1.15.2        # via -r requirements/tools.in\nprompt-toolkit==2.0.9     # via ipython\nptyprocess==0.6.0         # via pexpect\npy==1.8.0                 # via pytest\npygments==2.4.2           # via ipython, nbconvert\npyparsing==2.4.2          # via packaging\npyrsistent==0.15.4        # via jsonschema\npytest==5.0.1             # via -r requirements/tools.in\npython-dateutil==2.8.0    # via jupyter-client\npyyaml==5.1.1             # via aspy.yaml, detect-secrets, pre-commit\npyzmq==18.0.2             # via jupyter-client\nrequests==2.22.0          # via detect-secrets, yarg\nsix==1.12.0               # via bleach, cfgv, jsonschema, packaging, pip-tools, pre-commit, prompt-toolkit, python-dateutil, traitlets\ntestpath==0.4.2           # via nbconvert\ntoml==0.10.0              # via pre-commit\ntoposort==1.5             # via pip-compile-multi\ntornado==6.0.3            # via ipykernel, jupyter-client\ntraitlets==4.3.2          # via ipykernel, ipython, jupyter-client, jupyter-core, nbconvert, nbformat\nurllib3==1.25.3           # via requests\nvirtualenv==16.7.2        # via pre-commit\nwcwidth==0.1.7            # via prompt-toolkit, pytest\nwebencodings==0.5.1       # via bleach\nyarg==0.1.9               # via pipreqs\nzipp==0.5.2               # via importlib-metadata, importlib-resources\n\n# The following packages are considered to be unsafe in a requirements file:\n# pip\n# setuptools\n"
  },
  {
    "path": "setup.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom setuptools import find_packages, setup\n\nCMD_NAME = \"bayesmark\"\n\n# Strings to remove from README to make it PyPI friendly. See:\n# https://packaging.python.org/guides/making-a-pypi-friendly-readme/#validating-restructuredtext-markup\nREMOVE_FROM_RST = (\":func:\", \":ref:\")\n\n\ndef read_requirements(name):\n    with open(\"requirements/\" + name + \".in\") as f:\n        requirements = f.read().strip()\n    requirements = requirements.replace(\"==\", \">=\").splitlines()  # Loosen strict pins\n    return [pp for pp in requirements if pp[0].isalnum()]\n\n\n# Derive install requires from base.in first order requirements\nrequirements = read_requirements(\"base\")\nopt_requirements = read_requirements(\"optimizers\")\nipynb_requirements = read_requirements(\"ipynb\")\n\nwith open(\"README.rst\") as f:\n    long_description = f.read()\n# Probably more efficient way to do this with regex but good enough\nfor remove_word in REMOVE_FROM_RST:\n    long_description = long_description.replace(remove_word, \"\")\n\nsetup(\n    name=\"bayesmark\",\n    version=\"0.0.8\",\n    packages=find_packages(),\n    url=\"https://github.com/uber/bayesmark/\",\n    author=\"Ryan Turner\",\n    author_email=(\"rdturnermtl@github.com\"),\n    license=\"Apache v2\",\n    description=\"Bayesian optimization benchmark system\",\n    install_requires=requirements,\n    extras_require={\"optimizers\": opt_requirements, \"notebooks\": ipynb_requirements},\n    long_description=long_description,\n    long_description_content_type=\"text/x-rst\",\n    platforms=[\"any\"],\n    entry_points={\n        \"console_scripts\": [\n            CMD_NAME + \"-init = bayesmark.experiment_db_init:main\",\n            CMD_NAME + \"-launch = bayesmark.experiment_launcher:main\",\n            CMD_NAME + \"-agg = bayesmark.experiment_aggregate:main\",\n            CMD_NAME + \"-baseline = bayesmark.experiment_baseline:main\",\n            CMD_NAME + \"-anal = bayesmark.experiment_analysis:main\",\n            CMD_NAME + \"-exp = bayesmark.experiment:main\",\n        ]\n    },\n)\n"
  },
  {
    "path": "test/data_test.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom hypothesis import given\nfrom hypothesis.strategies import from_regex, sampled_from\n\nfrom bayesmark import data\n\nDATA_NAMES = sorted(data.DATA_LOADERS.keys())\n\n\n@given(sampled_from(DATA_NAMES) | from_regex(\"^reg-[A-Z]*\") | from_regex(\"^clf-[A-Z]*\"))\ndef test_get_problem_type(dataset_name):\n    problem_type = data.get_problem_type(dataset_name)\n    assert problem_type is not None\n"
  },
  {
    "path": "test/dummy.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nimport pytest\nimport pytest_cov\n\n# import extra deps and use it to keep pipreqs and flake8 happy\nfor pkg in (pytest, pytest_cov):\n    print(\"%s %s\" % (pkg.__name__, pkg.__version__))\n"
  },
  {
    "path": "test/expected_max_test.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom hypothesis import given\nfrom hypothesis.strategies import floats, integers, lists\n\nimport bayesmark.expected_max as em\n\n\n@given(integers(1, 10), integers(1, 10))\ndef test_get_expected_max_weights(n, m):\n    pdf = em.get_expected_max_weights(n, m)\n    assert pdf is not None\n\n\n@given(lists(floats()), integers(1, 10))\ndef test_expected_max(x, m):\n    E_max_x = em.expected_max(x, m)\n    assert E_max_x is not None\n\n\n@given(lists(floats()), integers(1, 10))\ndef test_expected_min(x, m):\n    E_min_x = em.expected_min(x, m)\n    assert E_min_x is not None\n"
  },
  {
    "path": "test/experiment_aggregate_test.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom itertools import product\n\nimport numpy as np\nfrom hypothesis import HealthCheck, given, settings\nfrom hypothesis.strategies import floats\nfrom hypothesis_gufunc.extra.xr import (\n    fixed_datasets,\n    simple_coords,\n    simple_dataarrays,\n    simple_datasets,\n    xr_coords,\n    xr_dims,\n)\n\nimport bayesmark.experiment_aggregate as agg\nfrom bayesmark.constants import EVAL_PHASE, ITER, METHOD, OBS_PHASE, SUGGEST, SUGGEST_PHASE, TEST_CASE, TRIAL\nfrom bayesmark.experiment import OBJECTIVE_NAMES\nfrom bayesmark.signatures import N_SUGGESTIONS\n\nN_SIG = N_SUGGESTIONS\nSIG_POINT = \"sig_point\"\n\n\ndef data_to_concat():\n    def separate(ds):\n        G = product(\n            ds.coords[TEST_CASE].values.tolist(), ds.coords[METHOD].values.tolist(), ds.coords[TRIAL].values.tolist()\n        )\n\n        L = []\n        for test_case, method, trial in G:\n            # Could swap out trial for UUID here\n            meta_data = (test_case, method, trial)\n\n            ds_sub = ds.sel({TEST_CASE: test_case, METHOD: method, TRIAL: trial}, drop=True)\n\n            perf_ds = ds_sub[list(OBJECTIVE_NAMES)]\n            time_ds = ds_sub[[SUGGEST_PHASE, EVAL_PHASE, OBS_PHASE]]\n            suggest_ds = ds_sub[[\"foo\", \"bar\", \"baz\"]]\n            sig = ds_sub[\"sig\"].values.tolist()\n            data = (perf_ds, time_ds, suggest_ds, sig)\n            L.append((meta_data, data))\n            assert not any(np.any(np.isnan(perf_ds[kk].values)) for kk in perf_ds)\n            assert not any(np.any(np.isnan(time_ds[kk].values)) for kk in time_ds)\n            assert not any(np.any(np.isnan(suggest_ds[kk].values)) for kk in suggest_ds)\n            assert not np.any(np.isnan(sig))\n        return L\n\n    vars_to_dims = {\n        \"sig\": (SIG_POINT, TEST_CASE, METHOD, TRIAL),\n        SUGGEST_PHASE: (ITER, TEST_CASE, METHOD, TRIAL),\n        EVAL_PHASE: (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL),\n        OBS_PHASE: (ITER, TEST_CASE, METHOD, TRIAL),\n    }\n    dtype = {SUGGEST_PHASE: np.float_, EVAL_PHASE: np.float_, OBS_PHASE: np.float_, \"sig\": np.float_}\n\n    for obj in OBJECTIVE_NAMES:\n        vars_to_dims[obj] = (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)\n        dtype[obj] = np.float_\n\n    # We should also generate this using the space strategy, but hard coding this test case is good enough got now.\n    input_vars = {\"foo\": np.float_, \"bar\": np.float_, \"baz\": np.int_}\n    for vv, dd in input_vars.items():\n        vars_to_dims[vv] = (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)\n        dtype[vv] = dd\n\n    float_no_nan = floats(allow_nan=False, min_value=-10, max_value=10)\n    # Using on str following dim conventions for coords here\n    coords_st = {\n        ITER: simple_coords(min_side=1),\n        SUGGEST: simple_coords(min_side=1),\n        TEST_CASE: xr_coords(elements=xr_dims(), min_side=1),\n        METHOD: xr_coords(elements=xr_dims(), min_side=1),\n        TRIAL: simple_coords(min_side=1),\n        SIG_POINT: simple_coords(min_side=N_SIG, max_side=N_SIG),\n    }\n    S = fixed_datasets(vars_to_dims, dtype=dtype, elements=float_no_nan, coords_st=coords_st, min_side=1).map(separate)\n    return S\n\n\ndef time_datasets():\n    vars_to_dims = {SUGGEST_PHASE: (ITER,), EVAL_PHASE: (ITER, SUGGEST), OBS_PHASE: (ITER,)}\n    dtype = {SUGGEST_PHASE: np.float_, EVAL_PHASE: np.float_, OBS_PHASE: np.float_}\n    elements = floats(min_value=0, allow_infinity=False, allow_nan=False)\n    S = simple_datasets(vars_to_dims, dtype=dtype, elements=elements, min_side=1)\n    return S\n\n\ndef perf_dataarrays():\n    dims = (ITER, SUGGEST)\n    elements = floats(allow_nan=False)\n    S = simple_dataarrays(dims, dtype=np.float_, elements=elements)\n    return S\n\n\n@given(time_datasets())\ndef test_summarize_time(all_time):\n    time_summary = agg.summarize_time(all_time)\n    assert time_summary is not None\n\n\n@given(data_to_concat())\n@settings(deadline=None, suppress_health_check=(HealthCheck.too_slow,))\ndef test_concat_experiments(all_experiments):\n    all_experiments = list(all_experiments)\n    all_perf, all_time, all_suggest, all_sigs = agg.concat_experiments(all_experiments, ravel=False)\n"
  },
  {
    "path": "test/experiment_analysis_test.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom hypothesis import given, settings\n\nimport bayesmark.experiment_baseline as base\nfrom bayesmark import experiment_analysis as anal\nfrom bayesmark.constants import TRIAL\nfrom bayesmark.np_util import argmin_2d\nfrom hypothesis_util import gufunc_floats\nfrom util import perf_dataarrays\n\n\n@given(gufunc_floats(\"(n,p,t),(n,p,t)->(n,t)\", allow_nan=False, unique=True, min_side=1))\ndef test_get_perf_array(args):\n    \"\"\"Behavior for tie-breaking in `evals_visible` is complex, so only testing all unique case here.\"\"\"\n    evals, evals_visible = args\n\n    n_iter, _, n_trials = evals.shape\n\n    perf_array = anal.get_perf_array(evals, evals_visible)\n    assert perf_array.shape == (n_iter, n_trials)\n\n    for ii in range(n_iter):\n        for jj in range(n_trials):\n            idx0, idx1 = argmin_2d(evals_visible[: ii + 1, :, jj])\n            assert perf_array[ii, jj] == evals[idx0, idx1, jj]\n\n\n@given(perf_dataarrays(min_trial=2))\n@settings(deadline=None)\ndef test_compute_aggregates(perf_da):\n    n_trial = perf_da.sizes[TRIAL]\n\n    split = n_trial // 2\n    assert isinstance(split, int)\n\n    perf_da1 = perf_da.isel({TRIAL: slice(None, split)})\n    assert perf_da1.sizes[TRIAL] >= 1\n\n    perf_da2 = perf_da.isel({TRIAL: slice(split, None)})\n    assert perf_da2.sizes[TRIAL] >= 1\n    perf_da2.coords[TRIAL] = list(range(perf_da2.sizes[TRIAL]))\n\n    baseline_ds = base.compute_baseline(perf_da1)\n    anal.compute_aggregates(perf_da2, baseline_ds)\n\n\n@given(perf_dataarrays(min_trial=4))\n@settings(deadline=None)\ndef test_compute_aggregates_with_aux(perf_da):\n    # Split to get baseline\n    n_trial = perf_da.sizes[TRIAL]\n    split = n_trial // 2\n    assert isinstance(split, int)\n    perf_da1 = perf_da.isel({TRIAL: slice(None, split)})\n    assert perf_da1.sizes[TRIAL] >= 1\n    perf_da2 = perf_da.isel({TRIAL: slice(split, None)})\n    assert perf_da2.sizes[TRIAL] >= 1\n    perf_da2.coords[TRIAL] = list(range(perf_da2.sizes[TRIAL]))\n    baseline_ds = base.compute_baseline(perf_da1)\n    perf_da = perf_da2\n\n    # Split to get visible\n    n_trial = perf_da.sizes[TRIAL]\n    split = n_trial // 2\n    assert isinstance(split, int)\n    perf_da1 = perf_da.isel({TRIAL: slice(None, split)})\n    assert perf_da1.sizes[TRIAL] >= 1\n    perf_da2 = perf_da.isel({TRIAL: slice(split, 2 * split)})\n    assert perf_da2.sizes[TRIAL] >= 1\n    perf_da2.coords[TRIAL] = list(range(perf_da2.sizes[TRIAL]))\n\n    anal.compute_aggregates(perf_da2, baseline_ds, visible_perf_da=perf_da1)\n"
  },
  {
    "path": "test/experiment_baseline_test.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nimport warnings\n\nfrom hypothesis import given, settings\n\nimport bayesmark.experiment_baseline as base\nfrom util import perf_dataarrays\n\n\n@given(perf_dataarrays())\n@settings(deadline=None)\ndef test_compute_baseline(perf_da):\n    with warnings.catch_warnings():\n        warnings.filterwarnings(\"ignore\", category=RuntimeWarning)\n        baseline_ds = base.compute_baseline(perf_da)\n    assert baseline_ds is not None\n"
  },
  {
    "path": "test/experiment_db_init_test.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nimport bayesmark.experiment_db_init as dbi\n\n\ndef test_main():\n    # Really a nop test since there is nothing to test in this func\n    assert dbi.EXIST_OK\n"
  },
  {
    "path": "test/experiment_launcher_test.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nimport os\nimport warnings\nfrom io import StringIO\nfrom string import ascii_letters, digits\n\nimport numpy as np\nfrom hypothesis import HealthCheck, assume, given, settings\nfrom hypothesis.strategies import (\n    booleans,\n    fixed_dictionaries,\n    from_regex,\n    integers,\n    lists,\n    sampled_from,\n    text,\n    tuples,\n    uuids,\n)\nfrom pathvalidate.argparse import validate_filename, validate_filepath\n\nfrom bayesmark import data\nfrom bayesmark import experiment_launcher as launcher\nfrom bayesmark.builtin_opt.config import CONFIG\nfrom bayesmark.cmd_parse import CmdArgs\nfrom bayesmark.constants import ARG_DELIM, METRICS, MODEL_NAMES\nfrom hypothesis_util import seeds\n\nDATA_NAMES = sorted(data.DATA_LOADERS.keys())\n\n\ndef filepaths():\n    def valid(ss):\n        try:\n            validate_filepath(ss)\n        except Exception:\n            return False\n        return True\n\n    alphabet = ascii_letters + digits + \"_.-~\" + os.sep\n    S = text(alphabet=alphabet, min_size=1).map(lambda ss: os.sep + ss).filter(valid)\n    return S\n\n\ndef filenames(suffix=\"\"):\n    def valid(ss):\n        try:\n            validate_filename(ss)\n        except Exception:\n            return False\n        return True\n\n    alphabet = ascii_letters + digits + \"_.-~\"\n    S = text(alphabet=alphabet, min_size=1).map(lambda ss: ss + suffix).filter(valid)\n    return S\n\n\ndef joinables():\n    S = filenames().filter(lambda ss: ARG_DELIM not in ss)\n    return S\n\n\ndef datasets():\n    return sampled_from(DATA_NAMES) | from_regex(\"^reg-[A-Z]*$\") | from_regex(\"^clf-[A-Z]*$\")\n\n\ndef launcher_args(opts, min_jobs=0):\n    args_dict = {\n        CmdArgs.db_root: filepaths(),\n        CmdArgs.optimizer_root: filepaths(),\n        CmdArgs.uuid: uuids(),\n        CmdArgs.data_root: filepaths(),\n        CmdArgs.db: filenames(),\n        CmdArgs.optimizer: lists(sampled_from(opts), min_size=1, max_size=len(opts)),\n        CmdArgs.data: lists(datasets(), min_size=1),\n        CmdArgs.classifier: lists(sampled_from(MODEL_NAMES), min_size=1, max_size=len(MODEL_NAMES)),\n        CmdArgs.metric: lists(sampled_from(METRICS), min_size=1, max_size=len(METRICS)),\n        CmdArgs.n_calls: integers(1, 100),\n        CmdArgs.n_suggest: integers(1, 100),\n        CmdArgs.n_repeat: integers(1, 100),\n        CmdArgs.n_jobs: integers(min_jobs, 1000),\n        CmdArgs.jobs_file: filepaths(),\n        CmdArgs.verbose: booleans(),\n    }\n    S = fixed_dictionaries(args_dict)\n    return S\n\n\ndef launcher_args_and_config(min_jobs=0):\n    def args_and_config(opts):\n        args = launcher_args(opts, min_jobs=min_jobs)\n        configs = fixed_dictionaries({ss: filenames(suffix=\".py\") for ss in opts})\n        args_and_configs = tuples(args, configs)\n        return args_and_configs\n\n    # Make opt names a mix of built in opts and arbitrary names\n    optimizers = lists(joinables() | sampled_from(sorted(CONFIG.keys())), min_size=1)\n    S = optimizers.flatmap(args_and_config)\n    return S\n\n\ndef test_is_arg_safe_empty():\n    val = launcher._is_arg_safe(\"\")\n    assert isinstance(val, bool)\n    assert not val\n\n\n@given(launcher_args_and_config(), uuids())\n@settings(deadline=None, suppress_health_check=(HealthCheck.too_slow,))\ndef test_gen_commands(args, run_uuid):\n    args, opt_file_lookup = args\n\n    assume(all(launcher._is_arg_safe(ss) for ss in args.values() if isinstance(ss, str)))\n\n    uniqify = [CmdArgs.optimizer, CmdArgs.data, CmdArgs.classifier, CmdArgs.metric]\n    for uu in uniqify:\n        assume(all(launcher._is_arg_safe(ss) for ss in args[uu]))\n        args[uu] = list(set(args[uu]))\n\n    m_set = set(args[CmdArgs.metric])\n    m_lookup = {problem_type: sorted(m_set.intersection(mm)) for problem_type, mm in data.METRICS_LOOKUP.items()}\n    ok = all(len(m_lookup[data.get_problem_type(dd)]) > 0 for dd in args[CmdArgs.data])\n    assume(ok)\n\n    G = launcher.gen_commands(args, opt_file_lookup, run_uuid)\n    L = list(G)\n    assert L is not None\n\n\n@given(launcher_args_and_config(min_jobs=1), uuids(), seeds())\n@settings(deadline=None, suppress_health_check=(HealthCheck.too_slow,))\ndef test_dry_run(args, run_uuid, seed):\n    args, opt_file_lookup = args\n\n    assume(all(launcher._is_arg_safe(ss) for ss in args.values() if isinstance(ss, str)))\n\n    uniqify = [CmdArgs.optimizer, CmdArgs.data, CmdArgs.classifier, CmdArgs.metric]\n    for uu in uniqify:\n        assume(all(launcher._is_arg_safe(ss) for ss in args[uu]))\n        args[uu] = list(set(args[uu]))\n\n    m_set = set(args[CmdArgs.metric])\n    m_lookup = {problem_type: sorted(m_set.intersection(mm)) for problem_type, mm in data.METRICS_LOOKUP.items()}\n    ok = all(len(m_lookup[data.get_problem_type(dd)]) > 0 for dd in args[CmdArgs.data])\n    assume(ok)\n\n    fp_buf = StringIO()\n    random = np.random.RandomState(seed)\n\n    with warnings.catch_warnings():\n        warnings.simplefilter(\"ignore\", category=UserWarning)\n        launcher.dry_run(args, opt_file_lookup, run_uuid, fp_buf, random=random)\n\n    jobs = fp_buf.getvalue()\n    assert jobs is not None\n"
  },
  {
    "path": "test/experiment_test.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nimport inspect\nimport os.path\n\nimport numpy as np\nfrom hypothesis import assume, given, settings\nfrom hypothesis.strategies import floats, integers, sampled_from, text\nfrom hypothesis_gufunc.extra.xr import simple_datasets\nfrom hypothesis_gufunc.gufunc import gufunc_args\n\nimport bayesmark.experiment as exp\nimport bayesmark.random_search as rs\nfrom bayesmark import data, np_util\nfrom bayesmark.abstract_optimizer import AbstractOptimizer\nfrom bayesmark.builtin_opt.config import CONFIG\nfrom bayesmark.constants import DATA_LOADER_NAMES, ITER, METRICS, MODEL_NAMES, SUGGEST\nfrom bayesmark.sklearn_funcs import SklearnModel, TestFunction\nfrom hypothesis_util import seeds\nfrom util import space_configs\n\n\nclass RandomOptimizer(AbstractOptimizer):\n    # Unclear what is best package to list for primary_import here.\n    primary_import = \"bayesmark\"\n\n    def __init__(self, api_config, random=np_util.random, flaky=False):\n        AbstractOptimizer.__init__(self, api_config)\n        self.random = random\n        self.flaky = flaky\n\n    def suggest(self, n_suggestions=1):\n        if self.flaky:\n            assert self.random.rand() <= 0.5\n        x_guess = rs.suggest_dict([], [], self.api_config, n_suggestions=n_suggestions, random=self.random)\n        return x_guess\n\n    def observe(self, X, y):\n        # Random search so don't do anything for observe\n        if self.flaky:\n            assert self.random.rand() <= 0.5\n\n\nclass OutOfBoundsOptimizer(AbstractOptimizer):\n    def __init__(self, api_config, random=np_util.random):\n        AbstractOptimizer.__init__(self, api_config)\n        self.random = random\n        self.param_list = sorted([kk for kk in api_config.keys() if api_config[kk][\"type\"] in (\"real\", \"int\")])\n\n    def suggest(self, n_suggestions=1):\n        x_guess = rs.suggest_dict([], [], self.api_config, n_suggestions=n_suggestions, random=self.random)\n\n        ii = self.random.randint(0, n_suggestions)\n        pp = self.random.choice(self.param_list)\n\n        if self.api_config[pp][\"type\"] == \"real\":\n            eps = self.random.rand()\n        else:\n            eps = self.random.randint(1, 10)\n\n        if self.random.rand() <= 0.5:\n            x_guess[ii][pp] = self.api_config[pp][\"range\"][0] - eps\n        else:\n            x_guess[ii][pp] = self.api_config[pp][\"range\"][1] + eps\n        return x_guess\n\n    def observe(self, X, y):\n        pass\n\n\nclass FlakyProblem(TestFunction):\n    def __init__(self, api_config, random):\n        TestFunction.__init__(self)\n        self.api_config = api_config\n        self.random = random\n\n    def evaluate(self, params):\n        assert self.random.rand() <= 0.5\n        return [0.0]\n\n\n@given(\n    sampled_from(MODEL_NAMES),\n    sampled_from(DATA_LOADER_NAMES),\n    sampled_from(METRICS),\n    integers(0, 5),\n    integers(1, 3),\n    seeds(),\n)\n@settings(max_examples=10, deadline=None)\ndef test_run_study(model_name, dataset, scorer, n_calls, n_suggestions, seed):\n    prob_type = data.get_problem_type(dataset)\n    assume(scorer in data.METRICS_LOOKUP[prob_type])\n\n    function_instance = SklearnModel(model_name, dataset, scorer)\n    optimizer = RandomOptimizer(function_instance.get_api_config(), random=np.random.RandomState(seed))\n    optimizer.get_version()\n    exp.run_study(optimizer, function_instance, n_calls, n_suggestions, n_obj=len(function_instance.objective_names))\n\n\n@given(\n    sampled_from(MODEL_NAMES),\n    sampled_from(DATA_LOADER_NAMES),\n    sampled_from(METRICS),\n    integers(1, 5),\n    integers(1, 3),\n    seeds(),\n)\ndef test_run_study_bounds_fail(model_name, dataset, scorer, n_calls, n_suggestions, seed):\n    prob_type = data.get_problem_type(dataset)\n    assume(scorer in data.METRICS_LOOKUP[prob_type])\n\n    function_instance = SklearnModel(model_name, dataset, scorer)\n    optimizer = OutOfBoundsOptimizer(function_instance.get_api_config(), random=np.random.RandomState(seed))\n    optimizer.get_version()\n\n    # pytest have some assert failed tools we could use instead, but this is ok for now\n    bounds_fails = False\n    try:\n        exp.run_study(\n            optimizer, function_instance, n_calls, n_suggestions, n_obj=len(function_instance.objective_names)\n        )\n    except Exception as e:\n        bounds_fails = str(e) == \"Optimizer suggestion is out of range.\"\n    assert bounds_fails\n\n\n@given(\n    sampled_from(MODEL_NAMES),\n    sampled_from(DATA_LOADER_NAMES),\n    sampled_from(METRICS),\n    integers(0, 5),\n    integers(1, 3),\n    seeds(),\n)\n@settings(max_examples=10, deadline=None)\ndef test_run_study_callback(model_name, dataset, scorer, n_calls, n_suggestions, seed):\n    prob_type = data.get_problem_type(dataset)\n    assume(scorer in data.METRICS_LOOKUP[prob_type])\n\n    function_instance = SklearnModel(model_name, dataset, scorer)\n    optimizer = RandomOptimizer(function_instance.get_api_config(), random=np.random.RandomState(seed))\n    optimizer.get_version()\n    n_obj = len(function_instance.objective_names)\n\n    function_evals_cmin = np.zeros((n_calls, n_obj), dtype=float)\n    iters_list = []\n\n    def callback(f_min, iters):\n        assert f_min.shape == (n_obj,)\n\n        iters_list.append(iters)\n        if iters == 0:\n            assert np.all(f_min == np.inf)\n            return\n\n        function_evals_cmin[iters - 1, :] = f_min\n\n    function_evals, _, _ = exp.run_study(\n        optimizer, function_instance, n_calls, n_suggestions, n_obj=n_obj, callback=callback\n    )\n\n    assert iters_list == list(range(n_calls + 1))\n\n    for ii in range(n_obj):\n        for jj in range(n_calls):\n            idx0, idx1 = np_util.argmin_2d(function_evals[: jj + 1, :, 0])\n            assert function_evals_cmin[jj, ii] == function_evals[idx0, idx1, ii]\n\n\n@given(space_configs(allow_missing=True), integers(0, 5), integers(1, 3), seeds(), seeds())\n@settings(deadline=None)\ndef test_run_study_flaky(api_config, n_calls, n_suggestions, seed1, seed2):\n    api_config, _, _, _ = api_config\n\n    function_instance = FlakyProblem(api_config=api_config, random=np.random.RandomState(seed1))\n    optimizer = RandomOptimizer(api_config, random=np.random.RandomState(seed2), flaky=True)\n    optimizer.get_version()\n    exp.run_study(optimizer, function_instance, n_calls, n_suggestions)\n\n\n@given(\n    space_configs(allow_missing=True),\n    sampled_from(MODEL_NAMES),\n    sampled_from(DATA_LOADER_NAMES),\n    sampled_from(METRICS),\n    integers(0, 5),\n    integers(1, 3),\n    seeds(),\n)\n@settings(max_examples=10, deadline=None)\ndef test_run_sklearn_study(api_config, model_name, dataset, scorer, n_calls, n_suggestions, seed):\n    prob_type = data.get_problem_type(dataset)\n    assume(scorer in data.METRICS_LOOKUP[prob_type])\n\n    random = np.random.RandomState(seed)\n    exp.run_sklearn_study(RandomOptimizer, {\"random\": random}, model_name, dataset, scorer, n_calls, n_suggestions)\n\n\n@given(\n    space_configs(allow_missing=True),\n    sampled_from(MODEL_NAMES),\n    sampled_from(DATA_LOADER_NAMES),\n    sampled_from(METRICS),\n    integers(0, 5),\n    integers(1, 3),\n)\n@settings(max_examples=10, deadline=None)\ndef test_run_sklearn_study_real(api_config, model_name, dataset, scorer, n_calls, n_suggestions):\n    prob_type = data.get_problem_type(dataset)\n    assume(scorer in data.METRICS_LOOKUP[prob_type])\n\n    # Should really do parametric test but for loop good enough\n    for opt_name in sorted(CONFIG.keys()):\n        opt_class = exp._get_opt_class(opt_name)\n        # opt_root=None should work with built-in opt\n        opt_kwargs = exp.load_optimizer_kwargs(opt_name, opt_root=None)\n\n        exp.run_sklearn_study(opt_class, opt_kwargs, model_name, dataset, scorer, n_calls, n_suggestions)\n\n\n@given(sampled_from(MODEL_NAMES), sampled_from(DATA_LOADER_NAMES), sampled_from(METRICS))\n@settings(deadline=None)\ndef test_get_objective_signature(model_name, dataset, scorer):\n    prob_type = data.get_problem_type(dataset)\n    assume(scorer in data.METRICS_LOOKUP[prob_type])\n\n    exp.get_objective_signature(model_name, dataset, scorer)\n\n\n@given(gufunc_args(\"(n,m,k),(k)->()\", dtype=[np.float_, str], elements=[floats(), text()], unique=[False, True]))\ndef test_build_eval_ds(args):\n    function_evals, objective_names = args\n    exp.build_eval_ds(function_evals, objective_names)\n\n\n@given(gufunc_args(\"(n),(n,m),(n)->()\", dtype=np.float_, elements=floats(min_value=0, max_value=1e6)))\ndef test_build_timing_ds(args):\n    suggest_time, eval_time, observe_time = args\n    exp.build_timing_ds(suggest_time, eval_time, observe_time)\n\n\n@given(\n    simple_datasets(\n        {\"int\": (ITER, SUGGEST), \"real\": (ITER, SUGGEST), \"binary\": (ITER, SUGGEST), \"cat\": (ITER, SUGGEST)},\n        dtype={\"int\": int, \"real\": float, \"binary\": bool, \"cat\": str},\n        min_side=1,\n    )\n)\ndef test_build_suggest_ds(suggest_ds):\n    ds_vars = list(suggest_ds)\n\n    n_call, n_suggest = suggest_ds[ds_vars[0]].values.shape\n    suggest_log = np.zeros((n_call, n_suggest), dtype=object)\n    for ii in range(n_call):\n        for jj in range(n_suggest):\n            suggest_log[ii, jj] = {}\n            for kk in ds_vars:\n                suggest_log[ii, jj][kk] = suggest_ds[kk].sel({ITER: ii, SUGGEST: jj}, drop=True).values.item()\n    suggest_log = suggest_log.tolist()\n\n    suggest_ds_2 = exp.build_suggest_ds(suggest_log)\n\n    assert suggest_ds.equals(suggest_ds_2)\n\n\ndef test_get_opt_class_module():\n    # Should really do parametric test but for loop good enough\n    for opt_name in sorted(CONFIG.keys()):\n        opt_class = exp._get_opt_class(opt_name)\n\n        fname = inspect.getfile(opt_class)\n        fname = os.path.basename(fname)\n\n        wrapper_file, _ = CONFIG[opt_name]\n\n        assert fname == wrapper_file\n"
  },
  {
    "path": "test/hypothesis_util.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nimport numpy as np\nfrom hypothesis import given\nfrom hypothesis.strategies import floats, integers, just, tuples\nfrom hypothesis_gufunc.gufunc import gufunc_args\n\n\ndef identity(x):\n    \"\"\"When one needs a default mapping that does nothing.\"\"\"\n    return x\n\n\ndef seeds():\n    return integers(min_value=0, max_value=(2 ** 32) - 1)\n\n\ndef probs():\n    return floats(min_value=1e-3, max_value=1 - 1e-3)\n\n\ndef mfloats():\n    return floats(min_value=-1e3, max_value=1e3)\n\n\ndef gufunc_floats(signature, min_side=0, max_side=5, unique=False, **kwargs):\n    elements = floats(**kwargs)\n    S = gufunc_args(signature, dtype=np.float_, elements=elements, unique=unique, min_side=min_side, max_side=max_side)\n    return S\n\n\ndef close_enough(x, y, equal_nan=False, rtol=1e-5, atol=1e-8):\n    # Might want to adjust rtol and atol for lower precision floats\n    x, y = np.asarray(x), np.asarray(y)\n\n    if x.shape != y.shape:\n        return False\n\n    if x.dtype != y.dtype:\n        return False\n\n    if x.dtype.kind == \"f\":\n        assert y.dtype.kind == \"f\"\n        # Note: equal_nan only considered in both float case!\n        return np.allclose(x, y, equal_nan=equal_nan, rtol=rtol, atol=atol)\n\n    return np.all(x == y)\n\n\ndef broadcasted(\n    f, signature, itypes, otypes, elements, unique=False, excluded=(), min_side=0, max_side=5, max_dims_extra=2\n):\n    \"\"\"Strategy that makes it easy to test the broadcasting semantics of a\n    function against the 'ground-truth' broadcasting convention provided by\n    :obj:`numpy.vectorize`.\n\n    Parameters\n    ----------\n    f : callable\n        This is the original function handles broadcasting itself. It must\n        return an `ndarray` or multiple `ndarray` (which Python treats as a\n        `tuple`) if returning 2-or-more output arguments.\n    signature : str\n        Signature for shapes to be compatible with. Expects string in format\n        of numpy generalized universal function signature, e.g.,\n        `'(m,n),(n)->(m)'` for vectorized matrix-vector multiplication.\n        Officially, only supporting ascii characters.\n    itypes : list-like of dtype\n        List of numpy `dtype` for each argument. These can be either strings\n        (``'int64'``), type (``np.int64``), or numpy `dtype`\n        (``np.dtype('int64')``). A single `dtype` can be supplied for all\n        arguments.\n    otypes : list of dtype\n        The dtype for the the outputs of `f`. It must be a list with one dtype\n        for each output argument of `f`. It must be a singleton list if `f`\n        only returns a single output. It can also be set to `None` to leave it\n        to be inferred, but this can create issues with empty arrays, so it is\n        not officially supported here.\n    elements : list-like of strategy\n        Strategies to fill in array elements on a per argument basis. One can\n        also specify a single strategy\n        (e.g., :func:`hypothesis.strategies.floats`)\n        and have it applied to all arguments.\n    unique : list-like of bool\n        Boolean flag to specify if all elements in an array must be unique.\n        One can also specify a single boolean to apply it to all arguments.\n    excluded : list-like of integers\n        Set of integers representing the positional for which the function will\n        not be vectorized. Uses same format as :obj:`numpy.vectorize`.\n    min_side : int or dict\n        Minimum size of any side of the arrays. It is good to test the corner\n        cases of 0 or 1 sized dimensions when applicable, but if not, a min\n        size can be supplied here. Minimums can be provided on a per-dimension\n        basis using a dict, e.g. ``min_side={'n': 2}``. One can use, e.g.,\n        ``min_side={hypothesis.extra.gufunc.BCAST_DIM: 2}`` to limit the size\n        of the broadcasted dimensions.\n    max_side : int or dict\n        Maximum size of any side of the arrays. This can usually be kept small\n        and still find most corner cases in testing. Dictionaries can be\n        supplied as with `min_side`.\n    max_dims_extra : int\n        Maximum number of extra dimensions that can be appended on left of\n        arrays for broadcasting. This should be kept small as the memory used\n        grows exponentially with extra dimensions.\n\n    Returns\n    -------\n    f : callable\n        This is the original function handles broadcasting itself.\n    f_vec : callable\n        Function that should be functionaly equivalent to `f` but broadcasting\n        is handled by :obj:`numpy.vectorize`.\n    res : tuple of ndarrays\n        Resulting ndarrays with shapes consistent with `signature`. Extra\n        dimensions for broadcasting will be present.\n\n    Examples\n    --------\n\n    .. code-block:: pycon\n\n      >>> import numpy as np\n      >>> from hypothesis.strategies import integers, booleans\n      >>> broadcasted(np.add, '(),()->()', ['int64'], ['int64', 'bool'],\n                      elements=[integers(0,9), booleans()],\n                      unique=[True, False]).example()\n      (<ufunc 'add'>,\n       <numpy.lib.function_base.vectorize at 0x11a777690>,\n       (array([5, 6]), array([ True], dtype=bool)))\n      >>> broadcasted(np.add, '(),()->()', ['int64'], ['int64', 'bool'],\n                      elements=[integers(0,9), booleans()],\n                      excluded=(1,)).example()\n      (<ufunc 'add'>,\n       <numpy.lib.function_base.vectorize at 0x11a715b10>,\n       (array([9]), array(True, dtype=bool)))\n      >>> f, fv, args = broadcasted(np.add, '(),()->()', ['int64'],\n                                    ['int64', 'bool'],\n                                    elements=[integers(0,9), booleans()],\n                                    min_side=1, max_side=3,\n                                    max_dims_extra=1).example()\n      >>> f is np.add\n      True\n      >>> f(*args)\n      7\n      >>> fv(*args)\n      array(7)\n    \"\"\"\n    # cache and doc not needed for property testing, excluded not actually\n    # needed here because we don't generate extra dims for the excluded args.\n    # Using the excluded argument in np.vectorize only seems to confuse it in\n    # corner cases.\n    f_vec = np.vectorize(f, signature=signature, otypes=otypes)\n\n    broadcasted_args = gufunc_args(\n        signature,\n        itypes,\n        elements,\n        unique=unique,\n        excluded=excluded,\n        min_side=min_side,\n        max_side=max_side,\n        max_dims_extra=max_dims_extra,\n    )\n    funcs_and_args = tuples(just(f), just(f_vec), broadcasted_args)\n    return funcs_and_args\n\n\ndef broadcast_tester(\n    f,\n    signature,\n    otype,\n    excluded=(),\n    dtype=np.float_,\n    elements=None,\n    unique=False,\n    map_=identity,\n    min_side=0,\n    max_side=5,\n    max_dims_extra=2,\n    **kwargs,  # This still confuses flake8\n):\n    # Build the test for broadcasting with random dimensions\n    elements = floats(**kwargs) if elements is None else elements\n\n    @given(\n        broadcasted(\n            f,\n            signature,\n            otypes=[otype],\n            excluded=excluded,\n            itypes=dtype,\n            elements=elements,\n            unique=unique,\n            min_side=min_side,\n            max_side=max_side,\n            max_dims_extra=max_dims_extra,\n        )\n    )\n    def test_f(bargs):\n        f0, f_vec, args = bargs\n        args = map_(args)\n\n        R1 = f0(*args)\n        R2 = f_vec(*args)\n\n        kind = np.dtype(otype).kind\n        if kind in \"US\":  # Same kind ok for str and unicode dtypes\n            assert R1.dtype.kind == kind\n            assert R2.dtype.kind == kind\n        elif otype is not None:\n            assert R1.dtype == otype\n            assert R2.dtype == otype\n        assert close_enough(R1, R2, equal_nan=True)\n\n    # Call the test\n    test_f()\n\n\ndef multi_broadcast_tester(\n    f,\n    signature,\n    otypes,\n    excluded=(),\n    dtype=np.float_,\n    elements=None,\n    unique=False,\n    map_=identity,\n    min_side=0,\n    max_side=5,\n    max_dims_extra=2,\n    **kwargs,\n):\n    elements = floats(**kwargs) if elements is None else elements\n\n    @given(\n        broadcasted(\n            f,\n            signature,\n            otypes=otypes,\n            excluded=excluded,\n            itypes=dtype,\n            elements=elements,\n            unique=unique,\n            min_side=min_side,\n            max_side=max_side,\n            max_dims_extra=max_dims_extra,\n        )\n    )\n    def test_f(bargs):\n        f0, f_vec, args = bargs\n        args = map_(args)\n\n        R1 = f0(*args)\n        R2 = f_vec(*args)\n        for rr1, rr2, ot in zip(R1, R2, otypes):\n            kind = np.dtype(ot).kind\n            if kind in \"US\":  # Same kind ok for str and unicode dtypes\n                assert R1.dtype.kind == kind\n                assert R2.dtype.kind == kind\n            else:\n                assert rr1.dtype == ot\n                assert rr2.dtype == ot\n            assert close_enough(rr1, rr2, equal_nan=True)\n\n    # Call the test\n    test_f()\n"
  },
  {
    "path": "test/np_util_test.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nimport numpy as np\nfrom hypothesis import assume, given\nfrom hypothesis.strategies import floats, integers, lists\n\nfrom bayesmark import np_util\nfrom hypothesis_util import broadcast_tester, close_enough, gufunc_floats, seeds\n\n\n@given(seeds())\ndef test_random_seed(seed):\n    random = np.random.RandomState(seed)\n    seed = np_util.random_seed(random)\n\n\n@given(lists(lists(floats())), seeds())\ndef test_shuffle_2d(X, seed):\n    random = np.random.RandomState(seed)\n    np_util.shuffle_2d(X, random)\n\n\n@given(gufunc_floats(\"(n,m)->()\"), integers(1, 5), seeds())\ndef test_strat_split(X, n_splits, seed):\n    X, = X\n\n    random = np.random.RandomState(seed)\n    np_util.strat_split(X, n_splits, inplace=False, random=random)\n\n    random = np.random.RandomState(seed)\n    np_util.strat_split(X, n_splits, inplace=True, random=random)\n\n\n@given(gufunc_floats(\"(),()->()\", allow_nan=False))\ndef test_isclose_lte_pass(args):\n    x, y = args\n    x = np.minimum(x, y + 1e-10)\n    assert np_util.isclose_lte(x, y)\n\n\n@given(gufunc_floats(\"(),()->()\", allow_nan=False))\ndef test_isclose_lte_fail(args):\n    x, y = args\n    fac = 1e20\n    if np.ndim(x) == 0:\n        y = np.nan_to_num(y)\n        x = y + fac * np.spacing(np.abs(y)) + 1\n    else:\n        y.flat[0] = np.nan_to_num(y.flat[0])\n        x.flat[0] = y.flat[0] + fac * np.spacing(np.abs(y.flat[0])) + 1\n    assert not np_util.isclose_lte(x, y)\n\n\ndef test_isclose_broadcast():\n    broadcast_tester(np_util.isclose_lte, \"(),()->()\", otype=\"bool\", min_value=-1000, max_value=1000)\n\n\n@given(gufunc_floats(\"(),(),()->()\", allow_nan=False))\ndef test_clip_chk_pass(args):\n    x, lb, ub = args\n\n    assume(lb <= ub)\n\n    x = np.clip(x, lb - 1e-10, ub + 1e-10)\n    x_clip = np_util.clip_chk(x, lb=lb, ub=ub)\n    assert np.all(x_clip == np.clip(x_clip, lb, ub))\n\n\n@given(gufunc_floats(\"(),(),()->()\", allow_nan=True))\ndef test_clip_chk_pass_nan(args):\n    x, lb, ub = args\n\n    assume(lb <= ub)\n\n    x = np.clip(x, lb - 1e-10, ub + 1e-10)\n    x_clip = np_util.clip_chk(x, lb=lb, ub=ub, allow_nan=True)\n    assert np.all((np.isnan(x) & np.isnan(x_clip)) | (x_clip == np.clip(x_clip, lb, ub)))\n\n\n@given(gufunc_floats(\"(n),()->()\", allow_nan=False))\ndef test_snap_to_pass(args):\n    x, val = args\n\n    x = np.clip(x, val - 1e-10, val + 1e-10)\n    x_snap = np_util.snap_to(x, val)\n    assert np.all(x_snap == val)\n\n\n@given(gufunc_floats(\"(),(),(),()->()\", min_value=-1000, max_value=1000))\ndef test_linear_rescale_bounds(args):\n    lb0, ub0, lb1, ub1 = args\n\n    # Use sorted because hypothesis doesn't like using assume too often\n    lb0, ub0 = sorted([lb0, ub0])\n    lb1, ub1 = sorted([lb1, ub1])\n\n    assume(lb0 < ub0)\n    assume(lb1 <= ub1)\n\n    lb1_ = np_util.linear_rescale(lb0, lb0, ub0, lb1, ub1)\n    assert close_enough(lb1, lb1_)\n\n    ub1_ = np_util.linear_rescale(ub0, lb0, ub0, lb1, ub1)\n    assert close_enough(ub1, ub1_)\n\n\n@given(gufunc_floats(\"(),(),(),(),()->()\", min_value=-1000, max_value=1000))\ndef test_linear_rescale_inner(args):\n    X, lb0, ub0, lb1, ub1 = args\n\n    # Use sorted because hypothesis doesn't like using assume too often\n    lb0, ub0 = sorted([lb0, ub0])\n    lb1, ub1 = sorted([lb1, ub1])\n\n    assume(lb0 < ub0)\n    assume(lb1 <= ub1)\n\n    X = np.clip(X, lb0, ub0)\n\n    X = np_util.linear_rescale(X, lb0, ub0, lb1, ub1)\n\n    assert np.all(X <= ub1)\n    assert np.all(lb1 <= X)\n\n\n@given(gufunc_floats(\"(),(),(),(),(),()->()\", min_value=-1000, max_value=1000))\ndef test_linear_rescale_inverse(args):\n    X, lb0, ub0, lb1, ub1, enforce_bounds = args\n    enforce_bounds = enforce_bounds >= 0\n\n    # Use sorted because hypothesis doesn't like using assume too often\n    lb0, ub0 = sorted([lb0, ub0])\n    lb1, ub1 = sorted([lb1, ub1])\n\n    assume(lb0 < ub0)\n    assume(lb1 < ub1)\n    # Can't expect numerics to work well in these extreme cases:\n    assume((ub0 - lb0) < 1e3 * (ub1 - lb1))\n\n    if enforce_bounds:\n        X = np.clip(X, lb0, ub0)\n\n    X_ = np_util.linear_rescale(X, lb0, ub0, lb1, ub1, enforce_bounds=enforce_bounds)\n    X_ = np_util.linear_rescale(X_, lb1, ub1, lb0, ub0, enforce_bounds=enforce_bounds)\n\n    assert close_enough(X_, X)\n\n\n@given(gufunc_floats(\"(),(),(),(),()->()\", min_value=-1000, max_value=1000))\ndef test_linear_rescale_bound_modes(args):\n    X, lb0, ub0, lb1, ub1 = args\n\n    # Use sorted because hypothesis doesn't like using assume too often\n    lb0, ub0 = sorted([lb0, ub0])\n    lb1, ub1 = sorted([lb1, ub1])\n\n    assume(lb0 < ub0)\n    assume(lb1 <= ub1)\n\n    X = np.clip(X, lb0, ub0)\n\n    Y1 = np_util.linear_rescale(X, lb0, ub0, lb1, ub1, enforce_bounds=False)\n    Y2 = np_util.linear_rescale(X, lb0, ub0, lb1, ub1, enforce_bounds=True)\n\n    assert close_enough(Y1, Y2)\n\n\ndef pair_sort(X, Y):\n    X, Y = np.broadcast_arrays(X, Y)\n    Z = [X, Y]\n    Z = np.sort(Z, axis=0)\n    X, Y = Z\n    return X, Y\n\n\ndef test_linear_rescale_broadcast():\n    def clean_up(args):\n        X, lb0, ub0, lb1, ub1, enforce_bounds = args\n\n        enforce_bounds = enforce_bounds >= 0\n\n        # Ideally, hypothesis should be able to handle constraints like this\n        lb0, ub0 = pair_sort(lb0, ub0)\n        lb1, ub1 = pair_sort(lb1, ub1)\n\n        assume(np.all(lb0 < ub0))\n        assume(np.all(lb1 <= ub1))\n\n        if enforce_bounds:\n            X = np.clip(X, lb0, ub0)\n\n        return X, lb0, ub0, lb1, ub1, enforce_bounds\n\n    broadcast_tester(\n        np_util.linear_rescale,\n        \"(),(),(),(),(),()->()\",\n        \"float64\",\n        excluded=(5,),\n        map_=clean_up,\n        min_value=-1000,\n        max_value=1000,\n    )\n\n\n@given(floats(), floats())\ndef test_isclose(x, y):\n    \"\"\"Test numpy version new enough to avoid broadcasting bug in `np.isclose`.\n\n    See numpy issue 'inconsistency in np.isclose #7014'. We could bump up numpy requirement version and\n    eliminate this wrapper.\n\n    See:\n    https://github.com/numpy/numpy/issues/7014\n    \"\"\"\n    z = np.isclose(x, y)\n    assert type(z) == np.bool_\n    assert z == np.squeeze(z)\n    assert np.isscalar(z)\n    assert np.shape(z) == ()\n\n    z = np.isclose(np.asarray(x), y)\n    assert type(z) == np.bool_\n    assert z == np.squeeze(z)\n    assert np.isscalar(z)\n    assert np.shape(z) == ()\n\n    z = np.isclose(x, np.asarray(y))\n    assert type(z) == np.bool_\n    assert z == np.squeeze(z)\n    assert np.isscalar(z)\n    assert np.shape(z) == ()\n\n    z = np.isclose(np.asarray(x), np.asarray(y))\n    assert type(z) == np.bool_\n    assert z == np.squeeze(z)\n    assert np.isscalar(z)\n    assert np.shape(z) == ()\n\n\ndef test_isclose_2():\n    \"\"\"Make sure we are running numpy version where numpy issue 'inconsistency in np.isclose #7014' has been fixed.\n    \"\"\"\n    y = np.isclose(0, 1)\n\n    assert np.ndim(y) == 0\n    assert np.isscalar(y)\n\n\n@given(gufunc_floats(\"(n,m)->(2)\", allow_nan=False, min_side=1))\ndef test_argmin_2d_no_nan(args):\n    X, = args\n\n    idx0, idx1 = np_util.argmin_2d(X)\n    assert X[idx0, idx1] <= np.min(X)\n\n\n@given(gufunc_floats(\"(n,m)->(2)\", allow_nan=True, min_side=1))\ndef test_argmin_2d_nan(args):\n    X, = args\n\n    idx0, idx1 = np_util.argmin_2d(X)\n    assert np.isnan(X[idx0, idx1]) == np.any(np.isnan(X))\n\n\n@given(gufunc_floats(\"(n,m),(n,m)->(n,m)\", allow_nan=False))\ndef test_cummin(args):\n    x_val, x_key = args\n\n    n, m = x_val.shape\n\n    c_min = np_util.cummin(x_val, x_key)\n    assert c_min.shape == (n, m)\n\n    for ii in range(n):\n        for jj in range(m):\n            last_min = np.where(x_key[: ii + 1, jj] == x_key[: ii + 1, jj].min())[0][-1]\n            assert x_key[last_min, jj] <= np.min(x_key[: ii + 1, jj])\n\n            assert c_min[ii, jj] == x_val[last_min, jj]\n\n\n@given(gufunc_floats(\"(n,m),(n,m)->(n,m)\", allow_nan=True))\ndef test_cummin_nan(args):\n    x_val, x_key = args\n\n    n, m = x_val.shape\n\n    x_key = np.nan_to_num(x_key)\n\n    c_min = np_util.cummin(x_val, x_key)\n    assert c_min.shape == (n, m)\n\n    for ii in range(n):\n        for jj in range(m):\n            last_min = np.where(x_key[: ii + 1, jj] == x_key[: ii + 1, jj].min())[0][-1]\n            assert x_key[last_min, jj] <= np.min(x_key[: ii + 1, jj])\n\n            if np.isnan(c_min[ii, jj]):\n                assert np.isnan(x_val[last_min, jj])\n            else:\n                assert c_min[ii, jj] == x_val[last_min, jj]\n"
  },
  {
    "path": "test/quantiles_test.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nimport numpy as np\nimport scipy.stats as ss\nfrom hypothesis import assume, given\nfrom hypothesis.strategies import floats, integers\nfrom hypothesis_gufunc.gufunc import gufunc_args as gufunc\n\nimport bayesmark.quantiles as qt\nfrom hypothesis_util import broadcast_tester, gufunc_floats, multi_broadcast_tester\n\n# We could use nextafter to get closer to limits, but still creates numerics\n# issues.\nABOVE0 = 1e-6\nBELOW1 = 1 - 1e-6\nGLOBAL_FPR = 0.05\n\n# Will need to generalize these out to limits before upstreaming:\nfloats_ = floats(allow_infinity=False, allow_nan=False)\ncounts = integers(1, 1000)\nprobs = floats(ABOVE0, BELOW1)\n\n\ndef order_stats_trim(X):\n    Y = qt.order_stats(X)\n\n    X_ss = np.array(X.shape)\n    X_ss[-1] = X_ss[-1] + 2\n    assert Y.shape == tuple(X_ss)\n    assert np.all(Y[..., 0] == -np.inf)\n    assert np.all(Y[..., -1] == np.inf)\n\n    Y = Y[..., 1:-1]  # Trim out infs\n    assert Y.shape == tuple(X.shape)\n    return Y\n\n\n@given(gufunc_floats(\"(n)->(m)\", allow_nan=False))\ndef test_order_stats(args):\n    X, = args\n\n    o_stats = qt.order_stats(X)\n\n    assert len(o_stats) == len(X) + 2\n\n    # test is sorted\n    assert not np.any(np.diff(o_stats) < 0)\n\n    # limit elements\n    assert o_stats[0] == -np.inf\n    assert o_stats[-1] == np.inf\n\n    # equal to equiv versions with lists\n    assert [-np.inf] + sorted(X) + [np.inf] == list(o_stats)\n\n\n@given(gufunc_floats(\"(n)->()\", allow_nan=False))\ndef test_quantile(args):\n    X, = args\n\n    ll = qt.quantile(X, np.nextafter(0, 1))\n    assert ll == -np.inf if len(X) == 0 else ll == np.min(X)\n\n    uu = qt.quantile(X, np.nextafter(1, 0))\n    assert uu == -np.inf if len(X) == 0 else uu == np.max(X)\n\n    if len(X) % 2 == 1:\n        mm = qt.quantile(X, 0.5)\n        assert mm == np.median(X)\n\n\n@given(gufunc(\"(n),()->()\", dtype=np.float_, elements=[floats_, probs]))\ndef test_quantile_to_np(args):\n    X, q = args\n\n    estimate = qt.quantile(X, q)\n\n    # Correct the off-by-1 error in numpy percentile. This might still have\n    # issues due to round off error by multiplying by 100 since powers of 10\n    # are not very fp friendly.\n    estimate_np = np.percentile(np.concatenate(([-np.inf], X)), 100 * q, interpolation=\"higher\")\n\n    assert estimate == estimate_np\n\n\n@given(gufunc(\"(n),(),()->(),()\", dtype=np.float_, elements=[floats_, probs, probs]))\ndef test_quantile_CI(args):\n    X, q, alpha = args\n\n    idx_q = qt._quantile(len(X), q)\n    idx_l, idx_u = qt._quantile_CI(len(X), q, alpha)\n    assert idx_l <= idx_q\n    assert idx_q <= idx_u\n\n    # Lot's of checks already inside quantile_CI\n    LB, UB = qt.quantile_CI(X, q, alpha)\n    assert LB <= UB\n\n    estimate = qt.quantile(X, q)\n    assert LB <= estimate\n    assert estimate <= UB\n\n\n@given(gufunc(\"(n),(),()->(),()\", dtype=np.float_, elements=[floats_, probs, probs]))\ndef test_quantile_CI_monotone_x(args):\n    X, q, alpha = args\n\n    assume(len(X) >= 1)\n\n    LB1, UB1 = qt.quantile_CI(X, q, alpha)\n\n    X2 = np.copy(X)\n    X2[0] = -np.inf\n    LB2, UB2 = qt.quantile_CI(X2, q, alpha)\n    assert LB1 >= LB2\n    assert UB1 >= UB2\n\n    X2 = np.copy(X)\n    X2[0] = np.inf\n    LB2, UB2 = qt.quantile_CI(X2, q, alpha)\n    assert LB1 <= LB2\n    assert UB1 <= UB2\n\n\n@given(gufunc(\"(n),(2),()->(),()\", dtype=np.float_, elements=[floats_, probs, probs]))\ndef test_quantile_CI_monotone_q(args):\n    X, q, alpha = args\n\n    q1, q2 = sorted(q)\n\n    # Lot's of checks already inside quantile_CI\n    LB1, UB1 = qt.quantile_CI(X, q1, alpha)\n    LB2, UB2 = qt.quantile_CI(X, q2, alpha)\n    assert LB1 <= LB2\n    assert UB1 <= UB2\n\n\n@given(gufunc(\"(n),(),(2)->(),()\", dtype=np.float_, elements=[floats_, probs, probs]))\ndef test_quantile_CI_monotone_alpha(args):\n    X, q, alpha = args\n\n    alpha1, alpha2 = sorted(alpha)\n\n    # Lot's of checks already inside quantile_CI\n    LB1, UB1 = qt.quantile_CI(X, q, alpha1)  # This CI should be larger\n    LB2, UB2 = qt.quantile_CI(X, q, alpha2)\n    assert LB1 <= LB2\n    assert UB1 >= UB2\n\n\n@given(\n    gufunc(\n        \"(n),(),(),()->()\", dtype=[np.float_, np.float_, np.int_, np.float_], elements=[floats_, probs, counts, probs]\n    )\n)\ndef test_max_quantile_CI(args):\n    X, q, m, alpha = args\n\n    estimate0, LB0, UB0 = qt.max_quantile_CI(X, q, m, alpha)\n    assert LB0 <= estimate0\n    assert estimate0 <= UB0\n\n    # Recompute without using _ internal funcs\n    q = q ** (1.0 / m)\n    LB, UB = qt.quantile_CI(X, q, alpha=alpha)\n    estimate = qt.quantile(X, q)\n\n    assert estimate0 == estimate\n    assert LB0 == LB\n    assert UB0 == UB\n\n\n@given(\n    gufunc(\n        \"(n),(),(),()->()\", dtype=[np.float_, np.float_, np.int_, np.float_], elements=[floats_, probs, counts, probs]\n    )\n)\ndef test_min_quantile_CI(args):\n    X, q, m, alpha = args\n\n    estimate0, LB0, UB0 = qt.min_quantile_CI(X, q, m, alpha)\n    assert LB0 <= estimate0\n    assert estimate0 <= UB0\n\n    # Recompute without using _ internal funcs\n    q = 1.0 - (1.0 - q) ** (1.0 / m)\n    LB, UB = qt.quantile_CI(X, q, alpha=alpha)\n    estimate = qt.quantile(X, q)\n\n    assert estimate0 == estimate\n    assert LB0 == LB\n    assert UB0 == UB\n\n\n@given(\n    gufunc(\n        \"(n),(),(),()->()\", dtype=[np.float_, np.float_, np.int_, np.float_], elements=[floats_, probs, counts, probs]\n    )\n)\ndef test_min_quantile_CI_to_max(args):\n    X, q, m, alpha = args\n\n    epsilon = 1e-8  # Small allowance for numerics\n\n    estimate0, LB0, UB0 = qt.min_quantile_CI(X, q, m, alpha)\n\n    # Try just above and below to allow for numerics error in case we are\n    # just on the boundary.\n    estimate1, LB1, UB1 = qt.max_quantile_CI(-X, (1.0 - q) - epsilon, m, alpha)\n    estimate2, LB2, UB2 = qt.max_quantile_CI(-X, 1.0 - q, m, alpha)\n    estimate3, LB3, UB3 = qt.max_quantile_CI(-X, (1.0 - q) + epsilon, m, alpha)\n\n    if len(X) == 0:\n        assert estimate0 == -np.inf  # quantile spec rounds down if n=0\n    else:\n        assert -estimate0 in (estimate1, estimate2, estimate3)\n\n    assert -LB0 in (UB1, UB2, UB3)\n    assert -UB0 in (LB1, LB2, LB3)\n\n\n@given(gufunc(\"(n),(),()->()\", dtype=np.float_, elements=[floats_, probs, probs]))\ndef test_quantile_and_CI(args):\n    X, q, alpha = args\n\n    estimate0, LB0, UB0 = qt.quantile_and_CI(X, q, alpha)\n    assert LB0 <= estimate0\n    assert estimate0 <= UB0\n\n    # Recompute without using _ internal funcs\n    LB, UB = qt.quantile_CI(X, q, alpha=alpha)\n    estimate = qt.quantile(X, q)\n\n    assert estimate0 == estimate\n    assert LB0 == LB\n    assert UB0 == UB\n\n\ndef test_order_stats_broadcast():\n    broadcast_tester(order_stats_trim, \"(n)->(n)\", otype=\"float64\", dtype=np.float_, elements=floats_)\n\n\ndef test_quantile_broadcast_0():\n    broadcast_tester(\n        qt.quantile, \"(n),()->()\", otype=\"float64\", excluded=(0,), dtype=np.float_, elements=[floats_, probs]\n    )\n\n\ndef test_quantile_broadcast_1():\n    broadcast_tester(\n        qt.quantile, \"(n),()->()\", otype=\"float64\", excluded=(1,), dtype=np.float_, elements=[floats_, probs]\n    )\n\n\ndef test_quantile_CI_broadcast_0():\n    multi_broadcast_tester(\n        qt.quantile_CI,\n        \"(n),(),()->(),()\",\n        otypes=[\"float64\", \"float64\"],\n        excluded=(0,),\n        dtype=np.float_,\n        elements=[floats_, probs, probs],\n    )\n\n\ndef test_quantile_CI_broadcast_1():\n    multi_broadcast_tester(\n        qt.quantile_CI,\n        \"(n),(),()->(),()\",\n        excluded=(1, 2),\n        otypes=[\"float64\", \"float64\"],\n        dtype=np.float_,\n        elements=[floats_, probs, probs],\n    )\n\n\ndef test_max_quantile_CI_broadcast_0():\n    multi_broadcast_tester(\n        qt.max_quantile_CI,\n        \"(n),(),(),()->(),(),()\",\n        otypes=[\"float64\", \"float64\", \"float64\"],\n        excluded=(0,),\n        dtype=[np.float_, np.float_, np.int_, np.float_],\n        elements=[floats_, probs, counts, probs],\n    )\n\n\ndef test_max_quantile_CI_broadcast_1():\n    multi_broadcast_tester(\n        qt.max_quantile_CI,\n        \"(n),(),(),()->(),(),()\",\n        otypes=[\"float64\", \"float64\", \"float64\"],\n        excluded=(1, 2, 3),\n        dtype=[np.float_, np.float_, np.int_, np.float_],\n        elements=[floats_, probs, counts, probs],\n    )\n\n\ndef test_min_quantile_CI_broadcast_0():\n    multi_broadcast_tester(\n        qt.min_quantile_CI,\n        \"(n),(),(),()->(),(),()\",\n        otypes=[\"float64\", \"float64\", \"float64\"],\n        excluded=(0,),\n        dtype=[np.float_, np.float_, np.int_, np.float_],\n        elements=[floats_, probs, counts, probs],\n    )\n\n\ndef test_min_quantile_CI_broadcast_1():\n    multi_broadcast_tester(\n        qt.min_quantile_CI,\n        \"(n),(),(),()->(),(),()\",\n        otypes=[\"float64\", \"float64\", \"float64\"],\n        excluded=(1, 2, 3),\n        dtype=[np.float_, np.float_, np.int_, np.float_],\n        elements=[floats_, probs, counts, probs],\n    )\n\n\ndef test_quantile_and_CI_broadcast_0():\n    multi_broadcast_tester(\n        qt.quantile_and_CI,\n        \"(n),(),()->(),(),()\",\n        otypes=[\"float64\", \"float64\", \"float64\"],\n        excluded=(0,),\n        dtype=np.float_,\n        elements=[floats_, probs, probs],\n    )\n\n\ndef test_quantile_and_CI_broadcast_1():\n    multi_broadcast_tester(\n        qt.quantile_and_CI,\n        \"(n),(),()->(),(),()\",\n        otypes=[\"float64\", \"float64\", \"float64\"],\n        excluded=(1, 2),\n        dtype=np.float_,\n        elements=[floats_, probs, probs],\n    )\n\n\ndef mc_test_quantile_CI(mc_runs=1000, n=2000, q=0.5, alpha=0.05, random=np.random):\n    q0 = ss.norm.ppf(q)\n\n    X = random.randn(mc_runs, n)\n    R = np.array([qt.quantile_CI(xx, q) for xx in X])\n    LB, UB = R[:, 0], R[:, 1]\n\n    n_pass = np.sum((LB <= q0) & (q0 <= UB))\n    # This is only a one-sided test\n    pval = ss.binom.cdf(n_pass, mc_runs, 1 - alpha)\n    return pval\n\n\ndef mc_test_max_quantile_CI(mc_runs=1000, n=2000, q=0.5, m=100, alpha=0.05, random=np.random):\n    qq_level = q ** (1.0 / m)\n    q0 = ss.norm.ppf(qq_level)\n\n    X = random.randn(mc_runs, n)\n    R = np.array([qt.max_quantile_CI(xx, q, m, alpha) for xx in X])\n    LB, UB = R[:, 1], R[:, 2]\n\n    n_pass = np.sum((LB <= q0) & (q0 <= UB))\n    # This is only a one-sided test\n    pval = ss.binom.cdf(n_pass, mc_runs, 1 - alpha)\n    return pval\n\n\ndef mc_test_min_quantile_CI(mc_runs=1000, n=2000, q=0.5, m=100, alpha=0.05, random=np.random):\n    qq_level = 1.0 - (1.0 - q) ** (1.0 / m)\n    q0 = ss.norm.ppf(qq_level)\n\n    X = random.randn(mc_runs, n)\n    R = np.array([qt.min_quantile_CI(xx, q, m, alpha) for xx in X])\n    LB, UB = R[:, 1], R[:, 2]\n\n    n_pass = np.sum((LB <= q0) & (q0 <= UB))\n    # This is only a one-sided test\n    pval = ss.binom.cdf(n_pass, mc_runs, 1 - alpha)\n    return pval\n\n\ndef test_all_mc():\n    random = np.random.RandomState(8623)\n\n    pvals = []\n    pvals.append(mc_test_quantile_CI(q=0.3, random=random))\n    pvals.append(mc_test_quantile_CI(q=0.5, random=random))\n    pvals.append(mc_test_quantile_CI(q=0.99, random=random))\n    pvals.append(mc_test_max_quantile_CI(q=0.3, random=random))\n    pvals.append(mc_test_max_quantile_CI(q=0.5, random=random))\n    pvals.append(mc_test_max_quantile_CI(q=0.99, random=random))\n    pvals.append(mc_test_min_quantile_CI(q=0.3, random=random))\n    pvals.append(mc_test_min_quantile_CI(q=0.5, random=random))\n    pvals.append(mc_test_min_quantile_CI(q=0.99, random=random))\n\n    SIDAK_FPR = 1.0 - (1.0 - GLOBAL_FPR) ** (1.0 / len(pvals))\n    assert np.min(pvals) >= SIDAK_FPR\n\n    return pvals\n"
  },
  {
    "path": "test/random_search_test.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nimport numpy as np\nfrom hypothesis import given, settings\nfrom hypothesis.strategies import integers\n\nimport bayesmark.space as sp\nfrom bayesmark.np_util import linear_rescale\nfrom bayesmark.random_search import suggest_dict\nfrom hypothesis_util import close_enough, gufunc_floats, seeds\nfrom util import space_configs\n\n\n@given(space_configs(allow_missing=True), integers(min_value=1, max_value=8), seeds())\n@settings(deadline=None)\ndef test_random_search_suggest_sanity(api_args, n_suggest, seed):\n    meta, X, y, _ = api_args\n\n    # Get the unwarped X\n    S = sp.JointSpace(meta)\n    lower, upper = S.get_bounds().T\n    S.validate(X)\n\n    N = len(X)\n    # Split history and call twice with diff histories but same seed\n    M = N // 2\n    X1, X2 = X[:M], X[M:]\n    y1, y2 = y[:M], y[M:]\n\n    x_guess = suggest_dict(X1, y1, meta, n_suggest, random=np.random.RandomState(seed))\n    x_guess2 = suggest_dict(X2, y2, meta, n_suggest, random=np.random.RandomState(seed))\n\n    # Check types too\n    assert len(x_guess) == n_suggest\n    assert all(all(close_enough(x_guess[nn][k], x_guess2[nn][k]) for k in x_guess[nn]) for nn in range(len(x_guess)))\n    assert np.all(x_guess == x_guess2)\n    # Make sure validated\n    S.validate(x_guess)\n    S.validate(x_guess2)\n\n    # Test sanity of output\n    D, = lower.shape\n    x_guess_w = S.warp(x_guess)\n    assert type(x_guess_w) == np.ndarray\n    assert x_guess_w.dtype.kind == \"f\"\n    assert x_guess_w.shape == (n_suggest, D)\n    assert x_guess_w.shape == (n_suggest, D)\n    assert np.all(x_guess_w <= upper)\n\n\n@given(\n    gufunc_floats(\"(n,D),(n)->()\", min_value=0.0, max_value=1.0, min_side={\"D\": 1}),\n    integers(min_value=1, max_value=10),\n    seeds(),\n)\n@settings(deadline=None)\ndef test_random_search_suggest_diff(api_args, n_suggest, seed):\n    # Hard to know how many iters needed for arbitrary space that we need to\n    # run so that we don't get dupes by chance. So, for now, let's just stick\n    # with this simple space.\n    dim = {\"space\": \"linear\", \"type\": \"real\", \"range\": [1.0, 5.0]}\n\n    # Use at least 10 n_suggest to make sure don't get same answer by chance\n    X_w, y = api_args\n\n    D = X_w.shape[1]\n    param_names = [\"x%d\" % ii for ii in range(5)]\n    meta = dict(zip(param_names, [dim] * D))\n\n    # Get the unwarped X\n    S = sp.JointSpace(meta)\n    lower, upper = S.get_bounds().T\n    X_w = linear_rescale(X_w, lb0=0.0, ub0=1.0, lb1=lower, ub1=upper)\n    X = S.unwarp(X_w)\n    S.validate(X)\n\n    seed = seed // 2  # Keep in bounds even after add 7\n\n    x_guess = suggest_dict(X, y, meta, n_suggest, random=np.random.RandomState(seed))\n    # Use diff seed to intentionally get diff result\n    x_guess2 = suggest_dict(X, y, meta, n_suggest, random=np.random.RandomState(seed + 7))\n\n    # Check types too\n    assert len(x_guess) == n_suggest\n    assert len(x_guess2) == n_suggest\n    assert not np.all(x_guess == x_guess2)\n    # Make sure validated\n    S.validate(x_guess)\n    S.validate(x_guess2)\n\n    # Test sanity of output\n    D, = lower.shape\n\n    x_guess_w = S.warp(x_guess)\n    assert type(x_guess_w) == np.ndarray\n    assert x_guess_w.dtype.kind == \"f\"\n    assert x_guess_w.shape == (n_suggest, D)\n    assert x_guess_w.shape == (n_suggest, D)\n    assert np.all(x_guess_w <= upper)\n\n    x_guess_w = S.warp(x_guess2)\n    assert type(x_guess_w) == np.ndarray\n    assert x_guess_w.dtype.kind == \"f\"\n    assert x_guess_w.shape == (n_suggest, D)\n    assert x_guess_w.shape == (n_suggest, D)\n    assert np.all(x_guess_w <= upper)\n"
  },
  {
    "path": "test/serialize_test.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nimport os\nfrom string import ascii_letters, digits\n\nfrom hypothesis import given\nfrom hypothesis.strategies import lists, text, uuids\nfrom pathvalidate.argparse import validate_filename, validate_filepath\n\nfrom bayesmark.serialize import XRSerializer\n\n\ndef filepaths():\n    def valid(ss):\n        try:\n            validate_filepath(ss)\n        except Exception:\n            return False\n        return True\n\n    alphabet = ascii_letters + digits + \"_.-~\" + os.sep\n    S = text(alphabet=alphabet, min_size=1).map(lambda ss: os.sep + ss).filter(valid)\n    return S\n\n\ndef filenames(suffix=\"\"):\n    def valid(ss):\n        try:\n            validate_filename(ss)\n        except Exception:\n            return False\n        return True\n\n    alphabet = ascii_letters + digits + \"_.-~\"\n    S = text(alphabet=alphabet, min_size=1).map(lambda ss: ss + suffix).filter(valid)\n    return S\n\n\n@given(filepaths(), lists(filenames()), filenames())\ndef test_init_db_manual(db_root, keys, db):\n    XRSerializer.init_db_manual(db_root, keys, db)\n\n\n@given(uuids())\ndef test_uuid_to_fname(uu):\n    ff = XRSerializer._uuid_to_fname(uu)\n    uu_ = XRSerializer._fname_to_uuid(ff)\n    assert uu == uu_\n\n    ff_ = XRSerializer._uuid_to_fname(uu_)\n    assert ff == ff_\n\n\n@given(filenames())\ndef test_key_to_fname(key):\n    ff = XRSerializer._key_to_fname(key)\n    kk = XRSerializer._fname_to_key(ff)\n    assert key == kk\n\n\n@given(filepaths(), lists(filenames()), filenames())\ndef test_validate(db_root, keys, db):\n    XRSerializer._validate(db_root, keys, db)\n"
  },
  {
    "path": "test/signatures_test.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nimport json\nimport random as pyrandom\nimport warnings\n\nimport numpy as np\nfrom hypothesis import given\nfrom hypothesis.strategies import dictionaries, floats, lists, text, tuples\n\nimport bayesmark.signatures as ss\nfrom bayesmark.experiment import OBJECTIVE_NAMES\nfrom util import space_configs\n\nN_SIG = ss.N_SUGGESTIONS\n\n\ndef bsigs():\n    S = lists(floats(allow_infinity=False, allow_nan=False), min_size=N_SIG, max_size=N_SIG)\n    return S\n\n\ndef sigs():\n    S = lists(bsigs(), min_size=1)\n    return S\n\n\ndef sig_pair():\n    def separate(D):\n        signatures, signatures_ref = {}, {}\n        for kk in D:\n            if len(D[kk]) == 1:\n                v_ref, = D[kk]\n                signatures_ref[kk] = np.asarray(v_ref)\n            elif len(D[kk]) == 2:\n                v, v_ref = D[kk]\n                signatures[kk] = np.asarray(v)\n                signatures_ref[kk] = np.asarray(v_ref)\n            else:\n                assert False\n        return signatures, signatures_ref\n\n    sig_dict = dictionaries(text(), tuples(bsigs()) | tuples(bsigs(), bsigs()))\n    S = sig_dict.map(separate)\n    return S\n\n\ndef some_mock_f(x):\n    \"\"\"Some arbitrary deterministic test function.\n    \"\"\"\n    random_stream = pyrandom.Random(json.dumps(x, sort_keys=True))\n    y = [random_stream.gauss(0, 1) for _ in OBJECTIVE_NAMES]\n    return y\n\n\n@given(space_configs())\ndef test_get_func_signature(api_config):\n    api_config, _, _, _ = api_config\n\n    signature_x, signature_y = ss.get_func_signature(some_mock_f, api_config)\n\n\n@given(dictionaries(text(), sigs()))\ndef test_analyze_signatures(signatures):\n    with warnings.catch_warnings():\n        warnings.filterwarnings(\"ignore\", category=RuntimeWarning)\n        sig_errs, signatures_median = ss.analyze_signatures(signatures)\n\n\n@given(sig_pair())\ndef test_analyze_signature_pair(args):\n    signatures, signatures_ref = args\n    with warnings.catch_warnings():\n        warnings.filterwarnings(\"ignore\", category=RuntimeWarning)\n        sig_errs, signatures_pair = ss.analyze_signature_pair(signatures, signatures_ref)\n"
  },
  {
    "path": "test/sklearn_funcs_test.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nimport pickle as pkl\n\nimport numpy as np\nfrom hypothesis import assume, given, settings\nfrom hypothesis.strategies import sampled_from, text\nfrom sklearn.linear_model import LinearRegression\n\nfrom bayesmark import data\nfrom bayesmark import sklearn_funcs as skf\nfrom bayesmark.constants import ARG_DELIM, DATA_LOADER_NAMES, METRICS, MODEL_NAMES\nfrom bayesmark.random_search import suggest_dict\nfrom bayesmark.space import JointSpace\nfrom hypothesis_util import seeds\n\n\n@given(sampled_from(MODEL_NAMES), sampled_from(DATA_LOADER_NAMES), sampled_from(METRICS), seeds(), seeds())\n@settings(deadline=None)\ndef test_sklearn_model(model, dataset, metric, shuffle_seed, rs_seed):\n    prob_type = data.get_problem_type(dataset)\n    assume(metric in data.METRICS_LOOKUP[prob_type])\n\n    test_prob = skf.SklearnModel(model, dataset, metric, shuffle_seed=shuffle_seed)\n\n    api_config = test_prob.get_api_config()\n    x_guess, = suggest_dict([], [], api_config, n_suggestions=1, random=np.random.RandomState(rs_seed))\n\n    loss = test_prob.evaluate(x_guess)\n\n    assert isinstance(loss, tuple)\n    assert all(isinstance(xx, float) for xx in loss)\n    assert np.shape(loss) == np.shape(test_prob.objective_names)\n\n\n@given(text(), text(), text())\ndef test_inverse_test_case_str(model, dataset, scorer):\n    assume(ARG_DELIM not in (model + dataset + scorer))\n\n    test_case = skf.SklearnModel.test_case_str(model, dataset, scorer)\n    R = skf.SklearnModel.inverse_test_case_str(test_case)\n\n    assert R == (model, dataset, scorer)\n\n\n@given(sampled_from(MODEL_NAMES), sampled_from(DATA_LOADER_NAMES), sampled_from(METRICS), seeds(), seeds())\n@settings(deadline=None)\ndef test_sklearn_model_surr(model, dataset, metric, model_seed, rs_seed):\n    prob_type = data.get_problem_type(dataset)\n    assume(metric in data.METRICS_LOOKUP[prob_type])\n\n    test_prob = skf.SklearnModel(model, dataset, metric, shuffle_seed=0)\n    api_config = test_prob.get_api_config()\n    space = JointSpace(api_config)\n\n    n_obj = len(test_prob.objective_names)\n\n    n_suggestions = 20\n\n    x_guess = suggest_dict([], [], api_config, n_suggestions=n_suggestions, random=np.random.RandomState(rs_seed))\n    x_guess_w = space.warp(x_guess)\n\n    random = np.random.RandomState(model_seed)\n    y = random.randn(n_suggestions, n_obj)\n\n    reg = LinearRegression()\n    reg.fit(x_guess_w, y)\n    loss0 = reg.predict(x_guess_w)\n\n    path = pkl.dumps(reg)\n    del reg\n    assert isinstance(path, bytes)\n\n    test_prob_surr = skf.SklearnSurrogate(model, dataset, metric, path)\n    loss = test_prob_surr.evaluate(x_guess[0])\n\n    assert isinstance(loss, tuple)\n    assert all(isinstance(xx, float) for xx in loss)\n    assert np.shape(loss) == np.shape(test_prob.objective_names)\n\n    assert np.allclose(loss0[0], np.array(loss))\n"
  },
  {
    "path": "test/space_test.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nimport numpy as np\nfrom hypothesis import assume, given\nfrom hypothesis.extra.numpy import from_dtype\nfrom hypothesis.strategies import booleans, floats, integers, just, lists, sampled_from\nfrom hypothesis_gufunc.gufunc import gufunc_args as gufunc\nfrom scipy.interpolate import interp1d\nfrom sklearn.preprocessing import LabelBinarizer\n\nimport bayesmark.space as sp\nfrom bayesmark.np_util import linear_rescale\nfrom bayesmark.space import CAT_DTYPE, CAT_KIND, CAT_NATIVE_DTYPE\nfrom hypothesis_util import broadcast_tester, close_enough, gufunc_floats\nfrom util import space_configs\n\nINT_MIN = np.iinfo(\"i\").min\nINT_MAX = np.iinfo(\"i\").max\n\nWARPS = (\"logit\", \"linear\", \"bilog\", \"log\")\nENCODER_DTYPES = (\"bool\", \"int\", \"float\")\n\n\ndef encoder_gen(args):\n    X, labels, assume_sorted, dtype, assume_valid = args\n\n    if assume_sorted:\n        labels = np.sort(labels)\n    X = labels[X % len(labels)]\n    dtype = dtype.item()  # np.array does not like np.array(dtype)\n    return X, labels, assume_sorted, dtype, assume_valid\n\n\ndef decoder_gen(args):\n    Y, labels, assume_sorted, dtype, assume_valid = args\n\n    if assume_sorted:\n        labels = np.sort(labels)\n    dtype = dtype.item()\n    return Y, labels, assume_sorted, dtype, assume_valid\n\n\ndef decoder_gen_broadcast(args):\n    Y, labels, assume_sorted = args\n\n    if assume_sorted:\n        labels = np.sort(labels)\n    return Y, labels, assume_sorted\n\n\n@given(\n    gufunc(\n        \"(),(n),(),(),()->(n)\",\n        dtype=[np.int_, CAT_DTYPE, np.bool_, str, np.bool_],\n        elements=[\n            integers(0, INT_MAX),\n            from_dtype(np.dtype(CAT_DTYPE)),\n            booleans(),\n            sampled_from(ENCODER_DTYPES),\n            booleans(),\n        ],\n        unique=[False, True, False, False, False],\n        min_side={\"n\": 1},\n    ).map(encoder_gen)\n)\ndef test_encode_decode(args):\n    X, labels, assume_sorted, dtype, assume_valid = args\n\n    Y = sp.encode(X, labels, assume_sorted=assume_sorted, dtype=dtype, assume_valid=assume_valid)\n    if assume_sorted:  # otherwise labels will be re-arranged\n        (idx,), = np.where(Y > 0)\n        assert np.asarray(labels[idx]) == X\n    assert Y.dtype == dtype\n\n    X2 = sp.decode(Y, labels, assume_sorted=assume_sorted)\n\n    assert close_enough(X, X2)\n\n\n@given(\n    gufunc(\n        \"(m),(n),(),(),()->(n)\",\n        dtype=[np.int_, CAT_DTYPE, np.bool_, str, np.bool_],\n        elements=[\n            integers(0, INT_MAX),\n            from_dtype(np.dtype(CAT_DTYPE)),\n            booleans(),\n            sampled_from(ENCODER_DTYPES),\n            booleans(),\n        ],\n        unique=[False, True, False, False, False],\n        min_side={\"m\": 1, \"n\": 3},\n    ).map(encoder_gen)\n)\ndef test_encoder_to_sklearn(args):\n    # sklearn cannot handle this correctly unless n >= 3\n    X, labels, assume_sorted, dtype, assume_valid = args\n\n    Y = sp.encode(X, labels, assume_sorted=assume_sorted, dtype=dtype, assume_valid=assume_valid)\n\n    enc = LabelBinarizer()\n    enc.fit(labels)\n    Y2 = enc.transform(X)\n\n    assert close_enough(Y, Y2.astype(dtype))\n\n\n@given(\n    gufunc(\n        \"(m,n),(n),(),(),()->(n)\",\n        dtype=[np.float_, CAT_DTYPE, np.bool_, str, np.bool_],\n        elements=[floats(), from_dtype(np.dtype(CAT_DTYPE)), booleans(), sampled_from(ENCODER_DTYPES), booleans()],\n        unique=[False, True, False, False, False],\n        min_side={\"n\": 1},\n    ).map(decoder_gen)\n)\ndef test_decode_encode(args):\n    Y, labels, assume_sorted, dtype, assume_valid = args\n\n    assert Y.ndim >= 1 and Y.shape[-1] == len(labels)\n\n    X = sp.decode(Y, labels, assume_sorted=assume_sorted)\n    Y2 = sp.encode(X, labels, assume_sorted=assume_sorted, dtype=dtype, assume_valid=assume_valid)\n\n    # The encoding is defined as the argmax\n    assert np.all(Y.argmax(axis=1) == Y2.argmax(axis=1))\n    assert np.all(np.sum(Y2 != 0, axis=1) == 1)\n    assert np.all(np.sum(Y2 == 1, axis=1) == 1)\n\n\n@given(\n    gufunc(\n        \"(m,n),(n),(),(),()->(n)\",\n        dtype=[np.float_, CAT_DTYPE, np.bool_, str, np.bool_],\n        elements=[floats(), from_dtype(np.dtype(CAT_DTYPE)), booleans(), sampled_from(ENCODER_DTYPES), booleans()],\n        unique=[False, True, False, False, False],\n        min_side={\"m\": 1, \"n\": 3},\n    ).map(decoder_gen)\n)\ndef test_decode_to_sklearn(args):\n    Y, labels, assume_sorted, dtype, assume_valid = args\n\n    assert Y.ndim >= 1 and Y.shape[-1] == len(labels)\n\n    X = sp.decode(Y, labels, assume_sorted=assume_sorted)\n\n    enc = LabelBinarizer()\n    enc.fit(labels)\n    X2 = enc.inverse_transform(Y)\n\n    assert X.dtype.kind == CAT_KIND\n    assert close_enough(X, X2.astype(X.dtype))\n\n\ndef test_encode_broadcast_bool():\n    broadcast_tester(\n        sp.encode,\n        \"(),(n),(),(),()->(n)\",\n        otype=bool,\n        excluded=(1, 2, 3, 4),\n        dtype=[np.int_, CAT_DTYPE, np.bool_, object, np.bool_],\n        elements=[integers(0, INT_MAX), from_dtype(np.dtype(CAT_DTYPE)), booleans(), just(\"bool\"), booleans()],\n        unique=[False, True, False, False, False],\n        min_side={\"n\": 1},\n        map_=encoder_gen,\n    )\n\n\ndef test_encode_broadcast_int():\n    broadcast_tester(\n        sp.encode,\n        \"(),(n),(),(),()->(n)\",\n        otype=int,\n        excluded=(1, 2, 3, 4),\n        dtype=[np.int_, CAT_DTYPE, np.bool_, object, np.bool_],\n        elements=[integers(0, INT_MAX), from_dtype(np.dtype(CAT_DTYPE)), booleans(), just(\"int\"), booleans()],\n        unique=[False, True, False, False, False],\n        min_side={\"n\": 1},\n        map_=encoder_gen,\n    )\n\n\ndef test_encode_broadcast_float():\n    broadcast_tester(\n        sp.encode,\n        \"(),(n),(),(),()->(n)\",\n        otype=float,\n        excluded=(1, 2, 3, 4),\n        dtype=[np.int_, CAT_DTYPE, np.bool_, object, np.bool_],\n        elements=[integers(0, INT_MAX), from_dtype(np.dtype(CAT_DTYPE)), booleans(), just(\"float\"), booleans()],\n        unique=[False, True, False, False, False],\n        min_side={\"n\": 1},\n        map_=encoder_gen,\n    )\n\n\ndef test_decode_broadcast_bool():\n    broadcast_tester(\n        sp.decode,\n        \"(m,n),(n),()->(m)\",\n        otype=CAT_DTYPE,\n        excluded=(1, 2),\n        dtype=[np.bool_, CAT_DTYPE, np.bool_],\n        elements=[booleans(), from_dtype(np.dtype(CAT_DTYPE)), booleans()],\n        unique=[False, True, False],\n        min_side={\"n\": 1},\n        map_=decoder_gen_broadcast,\n    )\n\n\ndef test_decode_broadcast_int():\n    broadcast_tester(\n        sp.decode,\n        \"(m,n),(n),()->(m)\",\n        otype=CAT_DTYPE,\n        excluded=(1, 2),\n        dtype=[np.int_, CAT_DTYPE, np.bool_],\n        elements=[integers(INT_MIN, INT_MAX), from_dtype(np.dtype(CAT_DTYPE)), booleans()],\n        unique=[False, True, False],\n        min_side={\"n\": 1},\n        map_=decoder_gen_broadcast,\n    )\n\n\ndef test_decode_broadcast_float():\n    broadcast_tester(\n        sp.decode,\n        \"(m,n),(n),()->(m)\",\n        otype=CAT_DTYPE,\n        excluded=(1, 2),\n        dtype=[np.float_, CAT_DTYPE, np.bool_],\n        elements=[floats(), from_dtype(np.dtype(CAT_DTYPE)), booleans()],\n        unique=[False, True, False],\n        min_side={\"n\": 1},\n        map_=decoder_gen_broadcast,\n    )\n\n\n@given(gufunc(\"()->()\", dtype=np.float_, elements=floats()))\ndef test_bilog_props(args):\n    x, = args\n\n    y = sp.bilog(x)\n\n    assert sp.bilog(0) == 0  # This could be its own test\n    assert close_enough(y, -sp.bilog(-x), equal_nan=True)\n    assert np.isfinite(y) == np.isfinite(x)\n\n\n@given(gufunc_floats(\"(2)->(2)\", allow_infinity=False, allow_nan=False))\ndef test_bilog_monotonic(args):\n    x, = args\n\n    x1, x2 = sorted(np.abs(x))\n\n    assert sp.bilog(x1) < sp.bilog((1 + 1e-6) * x2 + 1e-6)\n\n\n@given(gufunc(\"()->()\", dtype=np.float_, elements=floats()))\ndef test_bilog_biexp(args):\n    x, = args\n\n    assert close_enough(sp.biexp(sp.bilog(x)), x, equal_nan=True)\n\n\ndef test_bilog_broadcast():\n    broadcast_tester(sp.bilog, \"()->()\", otype=float)\n\n\ndef test_biexp_broadcast():\n    broadcast_tester(sp.biexp, \"()->()\", otype=float, min_value=-10, max_value=10)\n\n\n@given(sampled_from(WARPS), gufunc_floats(\"(n),(m)->(n)\", allow_infinity=False, allow_nan=False))\ndef test_real_values_warp_unwarp(warp, args):\n    x, values = args\n\n    if warp == \"log\":\n        values = values[values > 0]\n    if warp == \"logit\":\n        values = values[(0 < values) & (values < 1)]\n\n    # We could eliminate need for this if we split out test for log and logit\n    # cases and specify unique flag, but works as is\n    v = np.unique(values)\n    assume(len(v) >= 2)\n\n    f = interp1d(v, v, kind=\"nearest\", fill_value=\"extrapolate\")\n    x = f(x)\n    assert x.ndim == 1  # make sure interp1d did not mess it up\n\n    S = sp.Real(warp=warp, values=values)\n    y = S.warp(x)\n    assert y.shape == x.shape + (1,)\n    assert y.dtype == sp.WARPED_DTYPE\n\n    # Test bounds\n    lower, upper = S.get_bounds().T\n    assert np.all(lower <= y)\n    assert np.all(y <= upper)\n\n    y2 = S.validate_warped(y)\n    assert close_enough(y, y2)\n\n    x2 = S.unwarp(y)\n    assert x2.shape == x.shape\n    x3 = S.validate(x2)\n    assert close_enough(x2, x3)\n\n    assert close_enough(x, x2)\n\n\n@given(sampled_from(WARPS), gufunc_floats(\"(n),(2)->(n)\", allow_infinity=False, allow_nan=False))\ndef test_real_range_warp_unwarp(warp, args):\n    x, range_ = args\n\n    if warp == \"log\":\n        range_ = range_[range_ > 0]\n    if warp == \"logit\":\n        range_ = range_[(0 < range_) & (range_ < 1)]\n\n    range_ = np.sort(range_)\n    assume(len(range_) == 2 and range_[0] < range_[1])\n\n    x = np.clip(x, range_[0], range_[1])\n\n    S = sp.Real(warp=warp, range_=range_)\n\n    y = S.warp(x)\n    assert y.shape == x.shape + (1,)\n    assert y.dtype == sp.WARPED_DTYPE\n\n    # Test bounds\n    lower, upper = S.get_bounds().T\n    assert np.all(lower <= y)\n    assert np.all(y <= upper)\n\n    y2 = S.validate_warped(y)\n    assert close_enough(y, y2)\n\n    x2 = S.unwarp(y)\n    assert x2.shape == x.shape\n    x3 = S.validate(x2)\n    assert close_enough(x2, x3)\n\n    assert close_enough(x, x2)\n\n\n# Note to really stress test this we should elim min and max val, but that\n# requires that we split out a diff test func for log and logit\n@given(sampled_from(WARPS), gufunc_floats(\"(n,1),(2)->(n)\", min_value=-1000, max_value=1000))\ndef test_real_range_unwarp_warp(warp, args):\n    x_w, range_ = args\n\n    if warp == \"log\":\n        range_ = range_[range_ > 0]\n    if warp == \"logit\":\n        range_ = range_[(0 < range_) & (range_ < 1)]\n\n    range_ = np.sort(range_)\n    assume(len(range_) == 2 and range_[0] < range_[1])\n\n    range_warped = sp.WARP_DICT[warp](range_)\n\n    x_w = np.clip(x_w, range_warped[0], range_warped[1])\n\n    S = sp.Real(warp=warp, range_=range_)\n\n    # Test bounds\n    lower, upper = S.get_bounds().T\n    x_w = linear_rescale(x_w, lb0=-1000, ub0=1000, lb1=lower, ub1=upper)\n\n    x = S.unwarp(x_w)\n    assert x_w.shape == x.shape + (1,)\n    assert x.dtype == range_.dtype\n    assert x.dtype == S.dtype\n\n    x2 = S.validate(x)\n    assert close_enough(x, x2)\n\n    x_w2 = S.warp(x)\n    assert x_w2.shape == x_w.shape\n    x_w3 = S.validate_warped(x_w2)\n    assert close_enough(x_w2, x_w3)\n\n    assert close_enough(x_w, x_w2)\n\n\n@given(\n    sampled_from((\"linear\", \"bilog\")),\n    gufunc(\"(n),(m)->(n)\", dtype=np.int_, elements=integers(INT_MIN, INT_MAX), unique=[False, True], min_side={\"m\": 2}),\n)\ndef test_int_values_warp_unwarp(warp, args):\n    x, values = args\n\n    v = np.unique(values)  # Also sort\n    assert len(v) >= 2\n    f = interp1d(v, v, kind=\"nearest\", fill_value=\"extrapolate\")\n    x = f(x).astype(values.dtype)\n    assert x.ndim == 1  # make sure interp1d did not mess it up\n\n    S = sp.Integer(warp=warp, values=values)\n    y = S.warp(x)\n    assert y.shape == x.shape + (1,)\n    assert y.dtype == sp.WARPED_DTYPE\n\n    # Test bounds\n    lower, upper = S.get_bounds().T\n    assert np.all(lower <= y)\n    assert np.all(y <= upper)\n\n    y2 = S.validate_warped(y)\n    assert close_enough(y, y2)\n\n    x2 = S.unwarp(y)\n    assert x2.shape == x.shape\n    x3 = S.validate(x2)\n    assert close_enough(x2, x3)\n\n    assert close_enough(x, x2)\n\n\n@given(gufunc(\"(n),(m)->(n)\", dtype=np.int_, elements=integers(1, INT_MAX), unique=[False, True], min_side={\"m\": 2}))\ndef test_log_int_values_warp_unwarp(args):\n    x, values = args\n\n    warp = \"log\"\n\n    v = np.unique(values)  # Also sort\n    assert len(v) >= 2\n    f = interp1d(v, v, kind=\"nearest\", fill_value=\"extrapolate\")\n    x = f(x).astype(values.dtype)\n    assert x.ndim == 1  # make sure interp1d did not mess it up\n\n    S = sp.Integer(warp=warp, values=values)\n    y = S.warp(x)\n    assert y.shape == x.shape + (1,)\n    assert y.dtype == sp.WARPED_DTYPE\n\n    # Test bounds\n    lower, upper = S.get_bounds().T\n    assert np.all(lower <= y)\n    assert np.all(y <= upper)\n\n    y2 = S.validate_warped(y)\n    assert close_enough(y, y2)\n\n    x2 = S.unwarp(y)\n    assert x2.shape == x.shape\n    x3 = S.validate(x2)\n    assert close_enough(x2, x3)\n\n    assert close_enough(x, x2)\n\n\n@given(sampled_from((\"linear\", \"bilog\", \"log\")), gufunc(\"(n),(2)->(n)\", dtype=np.int_, elements=integers(-1000, 1000)))\ndef test_int_range_warp_unwarp(warp, args):\n    \"\"\"Warning: this explicitly ignores issues with min max if going to int\n    limit, since\n    >>> np.array(INT_MAX).astype(np.float32).astype(np.int32)\n    array(-2147483648, dtype=int32)\n    Without any warning from numpy.\n    \"\"\"\n    x, range_ = args\n\n    # We could split out log into diff function without this pruning if we\n    # start failing hypothesis health check.\n    if warp == \"log\":\n        range_ = range_[range_ > 0]\n\n    range_ = np.sort(range_)\n    assume(len(range_) == 2 and range_[0] < range_[1])\n\n    x = np.clip(x, range_[0], range_[1]).astype(range_.dtype)\n\n    S = sp.Integer(warp=warp, range_=range_)\n    y = S.warp(x)\n    assert y.shape == x.shape + (1,)\n    assert y.dtype == sp.WARPED_DTYPE\n\n    # Test bounds\n    lower, upper = S.get_bounds().T\n    assert np.all(lower <= y)\n    assert np.all(y <= upper)\n\n    y2 = S.validate_warped(y)\n    assert close_enough(y, y2)\n\n    x2 = S.unwarp(y)\n    assert x2.shape == x.shape\n    x3 = S.validate(x2)\n    assert close_enough(x2, x3)\n\n    assert x.dtype == x2.dtype\n    # Close enough when evaluated as floats\n    assert close_enough(x.astype(\"f\"), x2.astype(\"f\"))\n\n\n@given(gufunc(\"(n)->(n)\", dtype=np.bool_, elements=booleans()))\ndef test_bool_warp_unwarp(args):\n    x, = args\n\n    S = sp.Boolean()\n    y = S.warp(x)\n    assert y.shape == x.shape + (1,)\n    assert y.dtype == sp.WARPED_DTYPE\n\n    # Test bounds\n    lower, upper = S.get_bounds().T\n    assert np.all(lower <= y)\n    assert np.all(y <= upper)\n\n    y2 = S.validate_warped(y)\n    assert close_enough(y, y2)\n\n    x2 = S.unwarp(y)\n    assert x2.shape == x.shape\n    x3 = S.validate(x2)\n    assert close_enough(x2, x3)\n\n    assert close_enough(x, x2)\n\n\n@given(\n    gufunc(\n        \"(n),(m)->(n)\",\n        dtype=[np.int_, CAT_DTYPE],\n        elements=[integers(0, INT_MAX), from_dtype(np.dtype(CAT_DTYPE))],\n        unique=[False, True],\n        min_side={\"m\": 2},\n    )\n)\ndef test_cat_warp_unwarp(args):\n    x, values = args\n\n    assert len(set(values)) >= 2\n\n    x = values[x % len(values)]\n    assert x.ndim == 1\n\n    S = sp.Categorical(values=values)\n    y = S.warp(x)\n    assert y.shape == x.shape + (len(values),)\n    assert y.dtype == sp.WARPED_DTYPE\n\n    # Test bounds\n    lower, upper = S.get_bounds().T\n    assert np.all(lower <= y)\n    assert np.all(y <= upper)\n\n    y2 = S.validate_warped(y)\n    assert close_enough(y, y2)\n\n    x2 = S.unwarp(y)\n    assert x2.shape == x.shape\n    x3 = S.validate(x2)\n    assert close_enough(x2, x3)\n\n    assert close_enough(x, x2)\n\n\n@given(space_configs())\ndef test_joint_space_unwarp_warp(args):\n    meta, X, _, _ = args\n\n    S = sp.JointSpace(meta)\n    S.validate(X)\n\n    X_w2 = S.warp(X)\n    assert X_w2.dtype == sp.WARPED_DTYPE\n\n    # Test bounds\n    lower, upper = S.get_bounds().T\n    assert np.all(lower <= X_w2)\n    assert np.all(X_w2 <= upper)\n\n    X2 = S.unwarp(X_w2)\n\n    assert all(all(close_enough(X[ii][vv], X2[ii][vv]) for vv in X[ii]) for ii in range(len(X)))\n    S.validate(X2)\n\n\n@given(space_configs())\ndef test_joint_space_warp_missing(args):\n    meta, X, _, fixed_vars = args\n\n    S = sp.JointSpace(meta)\n\n    X_w = S.warp([fixed_vars])\n    assert X_w.dtype == sp.WARPED_DTYPE\n\n    # Test bounds\n    lower, upper = S.get_bounds().T\n    assert np.all((lower <= X_w) | np.isnan(X_w))\n    assert np.all((X_w <= upper) | np.isnan(X_w))\n\n    for param, xx in zip(S.param_list, np.hsplit(X_w, S.blocks[:-1])):\n        xx, = xx\n        if param in fixed_vars:\n            x_orig = S.spaces[param].unwarp(xx).item()\n            S.spaces[param].validate(x_orig)\n            assert close_enough(x_orig, fixed_vars[param])\n\n            # check other direction\n            x_w2 = S.spaces[param].warp(fixed_vars[param])\n            assert close_enough(xx, x_w2)\n        else:\n            assert np.all(np.isnan(xx))\n\n\n@given(space_configs())\ndef test_joint_space_warp_fixed_vars(args):\n    meta, X, _, fixed_vars = args\n\n    # set X vals equal to fixed_vars\n    for xx in X:\n        for param in fixed_vars:\n            xx[param] = fixed_vars[param]\n\n    S = sp.JointSpace(meta)\n    lower, upper = S.get_bounds().T\n\n    X_w = S.warp(X)\n    assert X_w.dtype == sp.WARPED_DTYPE\n\n    # Test bounds\n    lower, upper = S.get_bounds().T\n    assert np.all(lower <= X_w)\n    assert np.all(X_w <= upper)\n\n    X2 = S.unwarp(X_w, fixed_vals=fixed_vars)\n\n    # Make sure we get == not just close in unwarp for fixed vars\n    for xx in X2:\n        for param in fixed_vars:\n            assert xx[param] == fixed_vars[param]\n\n\n@given(space_configs(), integers(min_value=0, max_value=10))\ndef test_joint_grid(args, max_interp):\n    meta, _, _, _ = args\n\n    type_whitelist = (bool, int, float, CAT_NATIVE_DTYPE)\n\n    S = sp.JointSpace(meta)\n\n    lower, upper = S.get_bounds().T\n\n    G = S.grid(max_interp=max_interp)\n    assert sorted(G.keys()) == sorted(meta.keys())\n\n    for var, grid in G.items():\n        curr_space = S.spaces[var]\n\n        # Make sure same as calling direct\n        grid2 = curr_space.grid(max_interp)\n        assert grid == grid2\n\n        if len(grid) == 0:\n            assert grid == []\n            assert max_interp == 0 if curr_space.values is None else len(curr_space.values) == 0\n            continue\n\n        # Make sure native type\n        assert all(type(xx) in type_whitelist for xx in grid)\n        tt = type(grid[0])\n        assert all(type(xx) == tt for xx in grid)\n\n        assert np.all(np.array(grid) == np.unique(grid))\n\n        if max_interp >= 2:\n            assert curr_space.lower is None or close_enough(curr_space.lower, grid[0])\n            assert curr_space.upper is None or close_enough(curr_space.upper, grid[-1])\n\n        if curr_space.values is not None:\n            assert np.all(curr_space.values == grid)\n        else:\n            assert len(grid) <= max_interp\n            # Could else, check approx linear in warped space, but good enough\n            # for now.\n\n\ndef test_unravel_index_empty():\n    assert sp.unravel_index(()) == ()\n\n\n@given(lists(integers(0, 2), min_size=1))\ndef test_unravel_index_empty_2(dims):\n    if np.prod(dims) > 0:\n        dims[0] = 0\n    dims = tuple(dims)\n\n    assert sp.unravel_index(dims) == ()\n"
  },
  {
    "path": "test/stats_test.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nimport numpy as np\nimport scipy.stats as sst\nfrom hypothesis import assume, given\nfrom hypothesis.strategies import integers, lists, sampled_from\nfrom hypothesis_gufunc.gufunc import gufunc_args\nfrom sklearn.preprocessing import robust_scale\n\nfrom bayesmark import stats\nfrom hypothesis_util import close_enough, mfloats, probs, seeds\n\n\ndef t_test_(x):\n    \"\"\"Perform a standard t-test to test if the values in `x` are sampled from\n    a distribution with a zero mean.\n\n    Parameters\n    ----------\n    x : array-like, shape (n_samples,)\n        array of data points to test.\n\n    Returns\n    -------\n    pval : float\n        p-value (in [0,1]) from t-test on `x`.\n    \"\"\"\n    assert np.ndim(x) == 1 and (not np.any(np.isnan(x)))\n\n    if (len(x) <= 1) or (not np.all(np.isfinite(x))):\n        return 1.0  # Can't say anything about scale => p=1\n\n    _, pval = sst.ttest_1samp(x, 0.0)\n    if np.isnan(pval):\n        # Should only be possible if scale underflowed to zero:\n        assert np.var(x, ddof=1) <= 1e-100\n        # It is debatable if the condition should be ``np.mean(x) == 0.0`` or\n        # ``np.all(x == 0.0)``. Should not matter in practice.\n        pval = np.float(np.mean(x) == 0.0)\n    assert 0.0 <= pval and pval <= 1.0\n    return pval\n\n\n@given(gufunc_args(\"(n),()->(n)\", dtype=np.float_, elements=[mfloats(), probs()], min_side=2))\ndef test_robust_standardize_to_sklearn(args):\n    X, q_level = args\n\n    q0, q1 = 0.5 * (1.0 - q_level), 0.5 * (1.0 + q_level)\n    assert close_enough(q1 - q0, q_level)\n\n    X_bo = stats.robust_standardize(X, q_level=q_level)\n\n    X = X[:, None]\n    X_skl = robust_scale(X, axis=0, with_centering=True, with_scaling=True, quantile_range=[100.0 * q0, 100.0 * q1])\n    X_skl = X_skl[:, 0] * (sst.norm.ppf(q1) - sst.norm.ppf(q0))\n\n    assert close_enough(X_bo, X_skl, equal_nan=True)\n\n\ndef test_robust_standardize_broadcast():\n    \"\"\"Need to do things different here since standardize broadcasts over the\n    wrong dimension (0 instead of -1).\n    \"\"\"\n    # Build vectorize version, this is just loop inside.\n    f_vec = np.vectorize(stats.robust_standardize, signature=\"(n),()->(n)\", otypes=[\"float64\"])\n\n    @given(gufunc_args(\"(n,m),()->(n,m)\", dtype=np.float_, min_side={\"n\": 2}, elements=[mfloats(), probs()]))\n    def test_f(args):\n        X, q_level = args\n\n        R1 = stats.robust_standardize(X, q_level)\n        R2 = f_vec(X.T, q_level).T\n        assert R1.dtype == \"float64\"\n        assert R2.dtype == \"float64\"\n        assert close_enough(R1, R2, equal_nan=True)\n\n    # Call the test\n    test_f()\n\n\n@given(integers(0, 10), mfloats(), probs())\ndef test_t_EB_zero_var(N, val, alpha):\n    x = val + np.zeros(N)\n    EB = stats.t_EB(x, alpha=alpha)\n    if N <= 1:\n        assert EB == np.inf\n    else:\n        assert np.allclose(EB, 0.0)\n\n\n@given(integers(1, 10), sampled_from([np.inf, -np.inf]), probs())\ndef test_t_EB_inf(N, val, alpha):\n    x = np.zeros(N)\n    x[0] = val\n\n    EB = stats.t_EB(x, alpha=alpha)\n    if N <= 1:\n        assert EB == np.inf\n    else:\n        assert np.isnan(EB)\n\n\n@given(seeds(), probs(), integers(2, 10))\ndef test_t_EB_coverage(seed, alpha, N):\n    trials = 100\n\n    random_st = np.random.RandomState(seed)\n\n    fail = 0\n    for tt in range(trials):\n        x = random_st.randn(N)\n\n        EB = stats.t_EB(x, alpha=alpha)\n        mu = np.nanmean(x)\n        LB, UB = mu - EB, mu + EB\n        assert np.isfinite(LB) and np.isfinite(UB)\n        fail += (0.0 < LB) or (UB < 0.0)\n    pval = sst.binom_test(fail, trials, alpha)\n\n    assert pval >= 0.05 / 100  # Assume we run 100 times\n\n\n@given(lists(mfloats(), min_size=2))\ndef test_t_test_to_EB(x):\n    pval = t_test_(x)\n    assume(0.0 < pval and pval < 1.0)\n\n    EB = stats.t_EB(x, alpha=pval)\n    assert np.allclose(np.abs(np.mean(x)), EB)\n"
  },
  {
    "path": "test/util.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nimport numpy as np\nfrom hypothesis import assume\nfrom hypothesis.extra.numpy import arrays\nfrom hypothesis.strategies import (\n    binary,\n    booleans,\n    composite,\n    dictionaries,\n    floats,\n    from_regex,\n    frozensets,\n    integers,\n    lists,\n    sampled_from,\n    text,\n)\nfrom hypothesis_gufunc.extra.xr import fixed_dataarrays, simple_coords, xr_coords\n\nimport bayesmark.space as sp\nfrom bayesmark.constants import ARG_DELIM, ITER, METHOD, RANDOM_SEARCH, SUGGEST, TEST_CASE, TRIAL\nfrom bayesmark.np_util import linear_rescale\n\nNULL_PLUG = \"\\x00\"\n\n\ndef _easy_text():\n    # The NULL_PLUG confuses numpy arrays, so assume that is not in\n    S = text().filter(lambda ss: NULL_PLUG not in ss)\n    return S\n\n\ndef _hashable():\n    S = floats() | integers() | _easy_text()\n    return S\n\n\nCAT_STGY = _easy_text if sp.CAT_KIND == \"U\" else binary\n\nF_MIN = np.nextafter(0, 1)\n\nRANGES = {\"linear\": (-1000, 1000), \"log\": (F_MIN, 1000), \"logit\": (F_MIN, np.nextafter(1, 0)), \"bilog\": (-100, 100)}\n\nSPACES = tuple(sorted(sp.SPACE_DICT.keys()))\n\n\n@composite\ndef space_vars(draw, max_values=5):\n    \"\"\"Build composite strategy for random API calls.\"\"\"\n    type_ = draw(sampled_from(SPACES))\n    use_values = draw(booleans())\n\n    if type_ == \"real\":\n        warp = draw(sampled_from((\"linear\", \"log\", \"logit\", \"bilog\")))\n        min_val, max_val = RANGES[warp]\n        if use_values:\n            # Generating unique values to ensure that always have more than 2\n            # unique values, but code is designed to accept non-unique values\n            # arrays as long as more than 2 non-unique. Could generalize this.\n            values = draw(lists(floats(min_val, max_val), min_size=2, max_size=max_values, unique=True))\n            D = {\"type\": type_, \"space\": warp, \"values\": values}\n        else:\n            range_ = tuple(sorted(draw(lists(floats(min_val, max_val), min_size=2, max_size=2, unique=True))))\n            D = {\"type\": type_, \"space\": warp, \"range\": range_}\n    elif type_ == \"int\":\n        warp = draw(sampled_from((\"linear\", \"log\", \"bilog\")))\n        min_val, max_val = RANGES[warp]\n        # Must shrink these to next integers in range to keep hypothesis happy\n        min_val = int(np.ceil(min_val))\n        max_val = int(np.floor(max_val))\n        if use_values:\n            values = draw(lists(integers(min_val, max_val), min_size=2, max_size=max_values, unique=True))\n            D = {\"type\": type_, \"space\": warp, \"values\": values}\n        else:\n            range_ = tuple(sorted(draw(lists(integers(min_val, max_val), min_size=2, max_size=2, unique=True))))\n            D = {\"type\": type_, \"space\": warp, \"range\": range_}\n    elif type_ == \"bool\":\n        D = {\"type\": type_}\n    elif type_ == \"cat\" or type_ == \"ordinal\":\n        values = draw(lists(CAT_STGY(), min_size=2, max_size=max_values, unique=True))\n        # This assume is needed because np.unique has bug for null plug\n        # .. >>> np.unique([u'', u'\\x00'])\n        # .. array([u''], dtype='<U1')\n        assume(len(np.unique(values)) == len(values))\n        D = {\"type\": type_, \"values\": values}\n    else:\n        assert False\n\n    return D\n\n\n@composite\ndef space_configs(draw, max_vars=5, max_len=5, allow_missing=False, unique_y=False):\n    meta = draw(dictionaries(text(), space_vars(), min_size=1, max_size=max_vars))\n\n    S = sp.JointSpace(meta)\n    lower, upper = S.get_bounds().T\n\n    D = sum(len(var[\"values\"]) if var[\"type\"] in (\"cat\", \"ordinal\") else 1 for var in meta.values())\n\n    # Let's draw warped variable because that will be a lot easier\n    N = draw(integers(min_value=0, max_value=max_len))\n    X_w = draw(arrays(dtype=float, shape=(N, D), elements=floats(min_value=0.0, max_value=1.0)))\n    X_w = linear_rescale(X_w, lb0=0.0, ub0=1.0, lb1=lower, ub1=upper)\n    X = S.unwarp(X_w)\n\n    # Draw output too in case we want it\n    y_elements = floats(allow_infinity=False, allow_nan=allow_missing)\n    y = draw(arrays(dtype=float, shape=(N,), elements=y_elements, unique=unique_y))\n\n    # Draw the fixed vars\n    X_fixed_w = draw(arrays(dtype=float, shape=(1, D), elements=floats(min_value=0.0, max_value=1.0)))\n    X_fixed_w = linear_rescale(X_fixed_w, lb0=0.0, ub0=1.0, lb1=lower, ub1=upper)\n    X_fixed, = S.unwarp(X_fixed_w)\n\n    # Make fixed_vars a subset of all vars.\n    keep_in_fixed = draw(frozensets(sampled_from(tuple(X_fixed.keys()))))\n    X_fixed = {k: X_fixed[k] for k in keep_in_fixed}\n\n    return meta, X, y, X_fixed\n\n\n_test_cases = _easy_text\n\n\ndef perf_dataarrays(min_trial=1):\n    dims = (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)\n    # Don't get too close to infinity because that can also create issues and isn't supported\n    elements = floats(allow_nan=False, min_value=-1e300, max_value=1e300)\n\n    ref = RANDOM_SEARCH + ARG_DELIM\n    method_names = from_regex(\"^%s[A-Z]*\" % ref) | text()\n    method_st = xr_coords(elements=method_names).filter(lambda L: any(ss.startswith(ref) for ss in L))\n\n    coords_st = {\n        ITER: simple_coords(min_side=1),\n        SUGGEST: simple_coords(min_side=1),\n        TRIAL: simple_coords(min_side=min_trial),\n        METHOD: method_st,\n    }\n    S = fixed_dataarrays(dims, dtype=np.float_, elements=elements, coords_elements=_test_cases(), coords_st=coords_st)\n    return S\n"
  },
  {
    "path": "test/util_test.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nimport random as pyrandom\nimport shlex\n\nfrom hypothesis import assume, given\nfrom hypothesis.strategies import floats, integers, iterables, lists, text\n\nfrom bayesmark import util as bobm_util\nfrom util import _hashable\n\n\ndef some_mock_f(x):\n    \"\"\"Some arbitrary deterministic test function.\n    \"\"\"\n    random_stream = pyrandom.Random(hash(x))\n    y = random_stream.gauss(0, 1)\n    return y\n\n\n@given(_hashable(), lists(_hashable()))\ndef test_in_or_none(x, L):\n    val = bobm_util.in_or_none(x, L)\n    assert isinstance(val, bool)\n    assert val == (x in L)\n    assert val == (x in set(L))\n\n\n@given(_hashable())\ndef test_in_or_none_on_none(x):\n    val = bobm_util.in_or_none(x, None)\n    assert isinstance(val, bool)\n    assert val\n\n\n@given(lists(_hashable()))\ndef test_in_or_none_self(L):\n    for xx in L:\n        val = bobm_util.in_or_none(xx, L)\n        assert isinstance(val, bool)\n        assert val\n\n\n@given(lists(_hashable()))\ndef test_all_unique(L):\n    bobm_util.all_unique(L)\n\n\n@given(lists(integers(), unique=True) | lists(text(), unique=True) | lists(floats(), unique=True))\ndef test_strict_sorted(L):\n    bobm_util.strict_sorted(L)\n\n\n@given(integers(-5, 1000))\ndef test_range_str(stop):\n    list(bobm_util.range_str(stop))\n\n\n@given(text(), lists(text()))\ndef test_str_join_safe(delim, str_vec):\n    assume(not any(delim in ss for ss in str_vec))\n    bobm_util.str_join_safe(delim, str_vec, append=False)\n\n\n@given(text(), lists(text()), lists(text()))\ndef test_str_join_safe_append(delim, str_vec0, str_vec):\n    assume(not any(delim in ss for ss in str_vec0))\n    assume(not any(delim in ss for ss in str_vec))\n\n    start = bobm_util.str_join_safe(delim, str_vec0, append=False)\n    bobm_util.str_join_safe(delim, [start] + str_vec, append=True)\n\n\n@given(lists(text()))\ndef test_shell_join(argv):\n    cmd = bobm_util.shell_join(argv, delim=\" \")\n\n    assert shlex.split(cmd) == list(argv)\n\n\n@given(text(), text(min_size=1))\ndef test_chomp(str_val, ext):\n    bobm_util.chomp(str_val + ext, ext)\n\n\n@given(iterables(_hashable()))\ndef test_preimage_func(x):\n    bobm_util.preimage_func(some_mock_f, x)\n"
  },
  {
    "path": "test/xr_util_test.py",
    "content": "# Copyright (c) 2019 Uber Technologies, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom collections import OrderedDict\nfrom itertools import product\n\nimport xarray as xr\nfrom hypothesis import assume, given, settings\nfrom hypothesis.strategies import dictionaries, floats, integers, just, sampled_from, tuples\nfrom hypothesis_gufunc.extra.xr import (\n    _hashable,\n    dataarrays,\n    datasets,\n    fixed_datasets,\n    simple_dataarrays,\n    subset_lists,\n    vars_to_dims_dicts,\n    xr_vars,\n)\n\nimport bayesmark.xr_util as xru\n\nxr_fill = _hashable\n\n\ndef intersect_seq(L):\n    if len(L) == 0:\n        return set([])\n\n    S = set(L[0])\n    for xx in L[1:]:\n        S = S & set(xx)\n    return S\n\n\ndef ds_vars_dims():\n    def build_it(vars_to_dims_):\n        all_dims = list(set(sum((list(dd) for dd in vars_to_dims_.values()), [])))\n\n        ds = fixed_datasets(vars_to_dims_)\n        vars_ = subset_lists(list(vars_to_dims_.keys()))\n        dims = subset_lists(all_dims)\n        return tuples(ds, vars_, dims)\n\n    vars_to_dims_st = vars_to_dims_dicts()\n\n    S = vars_to_dims_st.flatmap(build_it)\n    return S\n\n\ndef ds_vars_dims_mixed():\n    def build_it(vars_to_dims_):\n        all_dims = list(set(sum((list(dd) for dd in vars_to_dims_.values()), [])))\n\n        ds = fixed_datasets(vars_to_dims_)\n\n        dims = subset_lists(all_dims)\n\n        vars_ = sampled_from(list(vars_to_dims_.keys()))\n        vars_dict = dictionaries(vars_, dims, dict_class=OrderedDict)\n        vars_dict = vars_dict.map(OrderedDict.items).map(list)\n\n        return tuples(ds, vars_dict, just(all_dims))\n\n    vars_to_dims_st = vars_to_dims_dicts(min_vars=0, min_dims=0)\n\n    S = vars_to_dims_st.flatmap(build_it)\n    return S\n\n\n@given(simple_dataarrays((\"foo\", \"bar\", \"baz\")) | dataarrays() | dataarrays(coords_elements=floats()), integers(0, 3))\ndef test_is_simple_coords(da, min_side):\n    xru.is_simple_coords(da.coords, min_side=min_side)\n\n\n@given(simple_dataarrays((\"foo\", \"bar\", \"baz\")))\ndef test_is_simple_coords_pass(da):\n    simple = xru.is_simple_coords(da.coords)\n    assert simple\n\n\n@given(ds_vars_dims(), xr_fill())\ndef test_ds_like(args, fill):\n    ref, vars_, dims = args\n\n    xru.ds_like(ref, vars_, dims, fill=fill)\n\n\n@given(ds_vars_dims_mixed(), xr_fill())\ndef test_ds_like_mixed(args, fill):\n    ref, vars_, dims = args\n\n    xru.ds_like_mixed(ref, vars_, dims, fill=fill)\n\n\n@given(xr_vars(), dataarrays())\ndef test_only_dataarray(var_, da):\n    assume(var_ not in da.dims)\n\n    ds = xr.Dataset({var_: da})\n\n    xru.only_dataarray(ds)\n\n\n@given(datasets())\ndef test_coord_compat(ds):\n    all_dims = [ds[kk].dims for kk in ds]\n    common_dims = sorted(intersect_seq(all_dims))\n    da_seq = [ds[kk] for kk in ds]\n\n    compat = xru.coord_compat(da_seq, common_dims)\n    assert compat\n\n\n@given(datasets())\ndef test_coord_compat_false(ds):\n    all_dims = [ds[kk].dims for kk in ds]\n    common_dims = sorted(intersect_seq(all_dims))\n    da_seq = [ds[kk] for kk in ds]\n\n    assume(len(da_seq) > 0)\n    assume(len(da_seq[0].dims) > 0)\n\n    da = da_seq[0]\n    kk = da.dims[0]\n    da_seq[0] = da.assign_coords(**{kk: range(da.sizes[kk])})\n\n    xru.coord_compat(da_seq, common_dims)\n\n\n@given(dataarrays(min_dims=1, max_dims=1))\ndef test_da_to_string(da):\n    xru.da_to_string(da)\n\n\n@given(dataarrays(min_side=0, min_dims=0), integers(1, 3))\n@settings(deadline=None)\ndef test_da_concat(da, n):\n    assume(n < len(da.dims))\n\n    da_dict, keys_to_slice = da_split(da, n)\n    assume(len(da_dict) > 0)\n    assert len(keys_to_slice) == n\n\n    xru.da_concat(da_dict, dims=keys_to_slice)\n\n\ndef da_split(da, n):\n    assert 0 < n\n    assert n <= len(da.dims)\n\n    keys_to_slice = da.dims[-n:]\n    da_dict = {}\n    vals = [da.coords[kk].values.tolist() for kk in keys_to_slice]\n    for vv in product(*vals):\n        lookup = dict(zip(keys_to_slice, vv))\n        da_dict[tuple(vv)] = da.sel(lookup, drop=True)\n    return da_dict, keys_to_slice\n\n\n@given(datasets(min_side=1, min_dims=1), integers(1, 3))\n@settings(deadline=None)\ndef test_ds_concat(ds, n):\n    all_dims = [ds[kk].dims for kk in ds]\n    common_dims = sorted(intersect_seq(all_dims))\n\n    n = min([n, len(common_dims) - 1])\n    assume(0 < n)\n\n    keys_to_slice = common_dims[:n]\n    ds_dict = {}\n    vals = [ds.coords[kk].values.tolist() for kk in keys_to_slice]\n    for vv in product(*vals):\n        lookup = dict(zip(keys_to_slice, vv))\n        ds_dict[vv] = ds.sel(lookup, drop=True)\n\n    xru.ds_concat(ds_dict, dims=keys_to_slice)\n"
  },
  {
    "path": "test.sh",
    "content": "#!/bin/bash\n\nset -ex\nset -o pipefail\n\n# Set conda paths\nexport CONDA_PATH=./tmp/conda\nexport CONDA_ENVS=env\n\n# Sometime pip PIP_REQUIRE_VIRTUALENV has issues with conda\nexport PIP_REQUIRE_VIRTUALENV=false\n\nPY_VERSIONS=( \"3.6\" \"3.7\" )\n\n# Handy to know what we are working with\ngit --version\n\n# Cleanup workspace, src for any old -e installs\ngit clean -x -f -d\nrm -rf src/\n\n# Install miniconda\nif command -v conda 2>/dev/null; then\n    echo \"Conda already installed\"\nelse\n    # We need to use miniconda since we can't figure out ho to install py3.6 in\n    # this env image. We could also use Miniconda3-latest-Linux-x86_64.sh but\n    # pinning version to make reprodicible.\n    echo \"Installing miniconda\"\n    if [[ \"$OSTYPE\" == \"darwin\"* ]]; then\n        # In future let's also try, for reprodicibility:\n        # curl -L -o miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-4.5.12-MacOSX-x86_64.sh;\n        curl -L -o miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh;\n    else\n        # In future let's also try, for reprodicibility:\n        # curl -L -o miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-4.5.12-Linux-x86_64.sh;\n        curl -L -o miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh;\n    fi\n    chmod +x ./miniconda.sh\n    ./miniconda.sh -b -p $CONDA_PATH\n    rm ./miniconda.sh\nfi\nexport PATH=$CONDA_PATH/bin:$PATH\n\n# Setup env just for installing pre-commit to run hooks on all files\nrm -rf \"$CONDA_ENVS\"\nENV_PATH=\"${CONDA_ENVS}/bobm_commit_hooks\"\nconda create -y -q -p $ENV_PATH python=3.6\necho $ENV_PATH\nsource activate $ENV_PATH\npython --version\npip freeze | sort\n# not listing 2nd order deps here, but probably ok\npip install -r requirements/tools.txt\n# Now run hooks on all files, don't need to install hooks since run directly\npre-commit run --all-files\n# Now can leave env with  pre-commit\nconda deactivate\n# Also check no changes to files by hooks\ntest -z \"$(git diff)\"\n# clean up for good measure, but need to keep miniconda tmp folder\ngit clean -x -f -d --exclude=tmp\n\n# Tool to get compare only the package names in pip file\n# On mac, sed -r needs to be seed -E\nnameonly () { grep -i '^[a-z0-9]' | sed -E \"s/([^=]*)==.*/\\1/g\" | tr _ - | sort -f; }\nnameveronly () { grep -i '^[a-z0-9]' | awk '{print $1}' | tr _ - | sort -f; }\npipcheck () { cat $@ | grep -i '^[a-z0-9]' | awk '{print $1}' | sed -f requirements/pipreqs_edits.sed | sort -f | uniq >ask.log && pip freeze | sed -f requirements/pipreqs_edits.sed | sort -f >got.log && diff -i ask.log got.log; }\n\n# Now test the deps\nENV_PATH=\"${CONDA_ENVS}/deps_test\"\nconda create -y -q -p $ENV_PATH python=3.6\necho $ENV_PATH\nsource activate $ENV_PATH\npython --version\npip freeze | sort\n\n# Install all requirements, make sure they are mutually compatible\npip install -r requirements/base.txt\npipcheck requirements/base.txt\n\n# Install package\npython setup.py install\npipcheck requirements/base.txt requirements/self.txt\n\npip install -r requirements/optimizers.txt\npipcheck requirements/base.txt requirements/self.txt requirements/optimizers.txt\n\npip install -r requirements/test.txt\npipcheck requirements/base.txt requirements/self.txt requirements/optimizers.txt requirements/test.txt\n\npip install -r requirements/ipynb.txt\npipcheck requirements/base.txt requirements/self.txt requirements/test.txt requirements/optimizers.txt requirements/ipynb.txt\npip install -r requirements/docs.txt\npipcheck requirements/base.txt requirements/self.txt requirements/test.txt requirements/optimizers.txt requirements/ipynb.txt requirements/docs.txt\n\npip install -r requirements/tools.txt\n\n# Make sure .in file corresponds to what is imported\nnameonly <requirements/base.in >ask.log\npipreqs bayesmark/  --ignore bayesmark/builtin_opt/ --savepath requirement_chk.in\nsed -f requirements/pipreqs_edits.sed requirement_chk.in | nameonly >got.log\ndiff ask.log got.log\n\nnameonly <requirements/test.in >ask.log\npipreqs test/ --savepath requirement_chk.in\nsed -f requirements/pipreqs_edits.sed requirement_chk.in | nameonly >got.log\ndiff ask.log got.log\n\nnameonly <requirements/optimizers.in >ask.log\npipreqs bayesmark/builtin_opt/ --savepath requirement_chk.in\nsed -f requirements/pipreqs_edits.sed requirement_chk.in | nameonly >got.log\ndiff ask.log got.log\n\nnameonly <requirements/docs.in >ask.log\npipreqs docs/ --savepath requirement_chk.in\nsed -f requirements/pipreqs_edits.sed requirement_chk.in | nameonly >got.log\ndiff ask.log got.log\n\nnameonly <requirements/ipynb.in >ask.log\njupyter nbconvert --to script notebooks/*.ipynb\npipreqs notebooks/ --savepath requirement_chk.in\nsed -f requirements/pipreqs_edits.sed requirement_chk.in | nameonly >got.log\ndiff ask.log got.log\n\n# Make sure txt file corresponds to pip compile\n# First copy the originals\nfor f in requirements/*.txt; do cp -- \"$f\" \"${f%.txt}.chk\"; done\n# Now re-compile\n# no-upgrade means that by default it keeps the 2nd order dependency versions already in the requirements txt file\n# (otherwise it brings it to the very latest available version which often causes issues).\npip-compile-multi -o txt --no-upgrade\n\nnameveronly <requirements/base.chk >ask.log\nsed -f requirements/pipreqs_edits.sed requirements/base.txt | nameveronly >got.log\ndiff ask.log got.log\n\nnameveronly <requirements/test.chk >ask.log\nsed -f requirements/pipreqs_edits.sed requirements/test.txt | nameveronly >got.log\ndiff ask.log got.log\n\nnameveronly <requirements/optimizers.chk | sed -f requirements/pipreqs_edits.sed >ask.log\nsed -f requirements/pipreqs_edits.sed requirements/optimizers.txt | nameveronly >got.log\ndiff ask.log got.log\n\nnameveronly <requirements/ipynb.chk | sed -f requirements/pipreqs_edits.sed >ask.log\nsed -f requirements/pipreqs_edits.sed requirements/ipynb.txt | nameveronly >got.log\ndiff ask.log got.log\n\nnameveronly <requirements/docs.chk | sed -f requirements/pipreqs_edits.sed >ask.log\nsed -f requirements/pipreqs_edits.sed requirements/docs.txt | nameveronly >got.log\ndiff ask.log got.log\n\nnameveronly <requirements/tools.chk | sed -f requirements/pipreqs_edits.sed >ask.log\nsed -f requirements/pipreqs_edits.sed requirements/tools.txt | nameveronly >got.log\ndiff ask.log got.log\n\n# Deactivate virtual environment\nconda deactivate\n\n# Set up environments for all Python versions and loop over them\nrm -rf \"$CONDA_ENVS\"\nfor i in \"${PY_VERSIONS[@]}\"\ndo\n    # Now test the deps\n    ENV_PATH=\"${CONDA_ENVS}/unit_test\"\n    conda create -y -q -p $ENV_PATH python=$i\n    echo $ENV_PATH\n    source activate $ENV_PATH\n    python --version\n    pip freeze | sort\n\n    # Install all requirements\n    pip install -r requirements/test.txt\n\n    # Install package\n    python setup.py install\n\n    # Run tests\n    pytest test/ -s -v --hypothesis-seed=0 --disable-pytest-warnings --cov=bayesmark --cov-report html\n\n    conda deactivate\ndone\n"
  },
  {
    "path": "tools/archive_branch.sh",
    "content": "#!/bin/bash\n\nset -ex\nset -o pipefail\n\nDATE=$(date +\"%Y%m%d\")\nTAGNAME=archive/$DATE-$1\n\n# Fail if untracked files\ntest -z \"$(git status --porcelain)\"\n\n# Fail if origin and local differ\ngit diff $1 origin/$1 --quiet\n\n# Prune remotes for good measure\ngit remote prune origin\n\ngit checkout $1\ngit tag -a $TAGNAME -m \"archived branch $1 on $DATE\"\ngit checkout master\ngit push origin $TAGNAME\n\n# Make sure we tagged correctly for good measure\ndiff <(git rev-list $TAGNAME -n 1) <(git rev-parse $1)\ngit ls-remote --tags origin | grep $(git rev-parse $1)\n\ngit branch -D $1\ngit push origin --delete $1\n\necho \"cleaned up\"\n"
  },
  {
    "path": "tools/deploy.sh",
    "content": "#!/bin/bash\n#\n# Note that\n# UUID=$(uuidgen)\n# works on Mac OS by default, but requires installation on linux.\n\nset -ex\nset -o pipefail\n\n# Script arguments\nREMOTE=$1\nBRANCH=$2\nPACKAGE=$3\nVERSION=$4\n\n# Check to make sure we have keys setup right before we start\ngit push --dry-run\n\n# Check versions are there, this is a crude way to do it but it works\ngrep \"^$PACKAGE==$VERSION\\$\" requirements/self.txt\ngrep '^__version__ = \"'$VERSION'\"$' bayesmark/__init__.py\ngrep 'version=\"'$VERSION'\",$' setup.py\n\n# Where envs go\nENVS=~/envs\n# Which python version this uses\nPY=python3.7\n# Which env contains twine and py version we use\nTWINE_ENV=twine_env\n# Where to run tar ball tests from\nTEST_DIR=~/tmp/deploy_tests\n\nmkdir -p $TEST_DIR\n\n# Get the dir\nREPO_DIR=$(pwd)\ngit checkout $BRANCH\n\n# Fail if untracked files and clean\ntest -z \"$(git status --porcelain)\"\ngit clean -x -ff -d\n\n# Run tests locally and cleanup\n./integration_test_with_setup.sh\n./test.sh\ngit reset --hard HEAD\ngit clean -x -ff -d\ntest -z \"$(git status --porcelain)\"\n\n# push to remote and check\ngit push -u $REMOTE $BRANCH\ngit diff $BRANCH $REMOTE/$BRANCH --quiet\n\n# See if tests pass remote, TODO use travis CLI\nread -t 1 -n 10000 discard || true\nread -p \"Travis tests pass [y/n]? \" -r\nif [[ ! $REPLY =~ ^[Yy]$ ]]\nthen\n    exit 1\nfi\n\n# test tar ball\nsource $ENVS/$TWINE_ENV/bin/activate\n./build_wheel.sh\ntwine check dist/*\ndeactivate\ncd $TEST_DIR\nUUID=$(uuidgen)\nmkdir $UUID\ncd $UUID\nvirtualenv env --python=$PY\nsource ./env/bin/activate\npip install -r $REPO_DIR/requirements/test.txt\npip install $REPO_DIR/dist/*.tar.gz\ncp -r $REPO_DIR/test .\npytest test/ -s -v --hypothesis-seed=0 --disable-pytest-warnings\ndeactivate\ncd $REPO_DIR\n# Cleanup since we will build again\ngit clean -x -ff -d\ntest -z \"$(git status --porcelain)\"\n\n# merge master\n# Fail if origin and local differ\ngit checkout $BRANCH\ngit diff master $REMOTE/master --quiet\ngit merge master --no-commit\n# Fail if not clean\ntest -z \"$(git status --porcelain)\"\n\n# merge to master\ngit checkout master\ngit merge $BRANCH --squash --no-commit\ngit status\nread -t 1 -n 10000 discard || true\nread -p \"Commit message (CTRL-C to abort): \"\ngit commit -m \"$REPLY\"\n# Fail if not clean\ntest -z \"$(git status --porcelain)\"\n\n# Run tests locally and cleanup\n./integration_test_with_setup.sh\n./test.sh\ngit reset --hard HEAD\ngit clean -x -ff -d\ntest -z \"$(git status --porcelain)\"\n\n# test tar ball\nsource $ENVS/$TWINE_ENV/bin/activate\n./build_wheel.sh\ntwine check dist/*\ndeactivate\ncd $TEST_DIR\nUUID=$(uuidgen)\nmkdir $UUID\ncd $UUID\nvirtualenv env --python=$PY\nsource ./env/bin/activate\npip install -r $REPO_DIR/requirements/test.txt\npip install $REPO_DIR/dist/*.tar.gz\ncp -r $REPO_DIR/test .\npytest test/ -s -v --hypothesis-seed=0 --disable-pytest-warnings\ndeactivate\ncd $REPO_DIR\n\n# push to test pypi\nsource $ENVS/$TWINE_ENV/bin/activate\ntwine upload --repository-url https://test.pypi.org/legacy/ dist/*\ndeactivate\n\necho \"ready to run?\"\necho \"pip install $PACKAGE==$VERSION --index-url https://test.pypi.org/simple/\"\nread -p \"Enter when pypi has updated: \" -r\n\n# install and test\ncd $TEST_DIR\nUUID=$(uuidgen)\nmkdir $UUID\ncd $UUID\nvirtualenv env --python=$PY\nsource ./env/bin/activate\npip install -r $REPO_DIR/requirements/test.txt\npip install -r $REPO_DIR/requirements/ipynb.txt\npip install $PACKAGE==$VERSION --index-url https://test.pypi.org/simple/\ncp $REPO_DIR/integration_test.sh .\ncp -r $REPO_DIR/notebooks .\ncp -r $REPO_DIR/example_opt_root .\n./integration_test.sh\ncp -r $REPO_DIR/test .\npytest test/ -s -v --hypothesis-seed=0 --disable-pytest-warnings\ndeactivate\ncd $REPO_DIR\n\n# push to remote and check\ngit push $REMOTE master\ngit diff master $REMOTE/master --quiet\n\n# Show sha256sum in case we want to check against PyPI test, use || for Mac OS version\nsha256sum dist/* || shasum -a 256 dist/*\n\n# See if tests pass remote, TODO use travis CLI\nread -t 1 -n 10000 discard || true\nread -p \"Travis tests pass, and push to PyPI? This cannot be undone. [push/no]\" -r\nif [[ ! $REPLY == push ]]\nthen\n    exit 1\nfi\n\n# push to full pypi\nsource $ENVS/$TWINE_ENV/bin/activate\ntwine upload dist/*\ndeactivate\n\necho \"ready to run?\"\necho \"pip install $PACKAGE==$VERSION\"\nread -p \"Enter when pypi has updated: \" -r\n\n# install and test\ncd $TEST_DIR\nUUID=$(uuidgen)\nmkdir $UUID\ncd $UUID\nvirtualenv env --python=$PY\nsource ./env/bin/activate\npip install -r $REPO_DIR/requirements/test.txt\npip install -r $REPO_DIR/requirements/ipynb.txt\npip install $PACKAGE==$VERSION\ncp $REPO_DIR/integration_test.sh .\ncp -r $REPO_DIR/notebooks .\ncp -r $REPO_DIR/example_opt_root .\n./integration_test.sh\ncp -r $REPO_DIR/test .\npytest test/ -s -v --hypothesis-seed=0 --disable-pytest-warnings\ndeactivate\ncd $REPO_DIR\n\n# clean and tag\ngit clean -x -ff -d\ntest -z \"$(git status --porcelain)\"\ngit tag -a v$VERSION -m \"$PACKAGE version $VERSION\"\ngit push $REMOTE v$VERSION\n\n# remind user to archive/delete branch\necho \"remember to delete branch $BRANCH, and update readthedocs.io\"\necho \"done\"\n"
  }
]