Repository: uber/bayesmark Branch: master Commit: 8c420e935718 Files: 102 Total size: 525.6 KB Directory structure: gitextract_vn3wrx6j/ ├── .coveragerc ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── .secrets.baseline ├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── README.rst ├── bayesmark/ │ ├── __init__.py │ ├── abstract_optimizer.py │ ├── builtin_opt/ │ │ ├── __init__.py │ │ ├── config.py │ │ ├── hyperopt_optimizer.py │ │ ├── nevergrad_optimizer.py │ │ ├── opentuner_optimizer.py │ │ ├── pysot_optimizer.py │ │ ├── random_optimizer.py │ │ └── scikit_optimizer.py │ ├── cmd_parse.py │ ├── constants.py │ ├── data.py │ ├── expected_max.py │ ├── experiment.py │ ├── experiment_aggregate.py │ ├── experiment_analysis.py │ ├── experiment_baseline.py │ ├── experiment_db_init.py │ ├── experiment_launcher.py │ ├── np_util.py │ ├── path_util.py │ ├── quantiles.py │ ├── random_search.py │ ├── serialize.py │ ├── signatures.py │ ├── sklearn_funcs.py │ ├── space.py │ ├── stats.py │ ├── util.py │ └── xr_util.py ├── build_wheel.sh ├── docs/ │ ├── .gitignore │ ├── Makefile │ ├── authors.rst │ ├── code.rst │ ├── conf.py │ ├── dummy.py │ ├── index.rst │ ├── readme.rst │ └── scoring.rst ├── example_opt_root/ │ ├── config.json │ ├── flaky_optimizer.py │ ├── hyperopt_optimizer.py │ ├── nevergrad_optimizer.py │ ├── opentuner_optimizer.py │ ├── pysot_optimizer.py │ ├── random_optimizer.py │ └── scikit_optimizer.py ├── integration_test.sh ├── integration_test_with_setup.sh ├── notebooks/ │ ├── dummy.py │ ├── plot_mean_score.ipynb │ └── plot_test_case.ipynb ├── requirements/ │ ├── base.in │ ├── base.txt │ ├── docs.in │ ├── docs.txt │ ├── ipynb.in │ ├── ipynb.txt │ ├── optimizers.in │ ├── optimizers.txt │ ├── pipreqs_edits.sed │ ├── self.txt │ ├── test.in │ ├── test.txt │ ├── tools.in │ └── tools.txt ├── setup.py ├── test/ │ ├── data_test.py │ ├── dummy.py │ ├── expected_max_test.py │ ├── experiment_aggregate_test.py │ ├── experiment_analysis_test.py │ ├── experiment_baseline_test.py │ ├── experiment_db_init_test.py │ ├── experiment_launcher_test.py │ ├── experiment_test.py │ ├── hypothesis_util.py │ ├── np_util_test.py │ ├── quantiles_test.py │ ├── random_search_test.py │ ├── serialize_test.py │ ├── signatures_test.py │ ├── sklearn_funcs_test.py │ ├── space_test.py │ ├── stats_test.py │ ├── util.py │ ├── util_test.py │ └── xr_util_test.py ├── test.sh └── tools/ ├── archive_branch.sh └── deploy.sh ================================================ FILE CONTENTS ================================================ ================================================ FILE: .coveragerc ================================================ [report] exclude_lines = pragma: no cover @abstract ValueError NotImplementedError assert _error def main() pragma: io pragma: main pragma: validator ================================================ FILE: .gitignore ================================================ .* !.gitignore !.gitmodules !.flake8 !.coveragerc !.pre-commit-config.yaml !.secrets.baseline !.travis.yml !.readthedocs.yml # For wheels bayesmark/version.py dist/ # Java *.class # Intellij *.iml *.iws # Gradle build/ classes/ log/ tmp/ /out/ ins.xml *.log # Python *.py[co] *.egg* .cache .DS_Store # env env/ # Emacs *~ .\#* \#*\# # *ipynb .ipynb_checkpoints *.png *.aux # Hypothesis tests/src src/ # Coverage htmlcov/ # for the test.sh pip compile check requirements/*.chk requirement_chk.in ================================================ FILE: .pre-commit-config.yaml ================================================ - repo: https://github.com/pre-commit/pre-commit-hooks rev: v1.2.3 hooks: - id: flake8 exclude: ^(docs/*) args: [--max-line-length=120, --ignore=E203] - id: check-byte-order-marker - id: check-case-conflict - id: check-merge-conflict - id: end-of-file-fixer - id: forbid-new-submodules - id: mixed-line-ending args: [--fix=lf] - id: trailing-whitespace - id: debug-statements - id: check-json - id: pretty-format-json args: [--autofix, --indent=4] - id: check-yaml - id: sort-simple-yaml - repo: https://github.com/ambv/black rev: 19.3b0 hooks: - id: black args: [-l 120, --target-version=py36] - repo: https://github.com/asottile/seed-isort-config rev: v1.2.0 hooks: - id: seed-isort-config args: [--application-directories=test] - repo: https://github.com/pre-commit/mirrors-isort rev: v4.3.4 hooks: - id: isort language_version: python3 args: [-w 120, -m 3, -tc, --project=bayesmark] - repo: https://github.com/jumanjihouse/pre-commit-hooks rev: 1.11.0 hooks: - id: require-ascii - id: script-must-have-extension - id: forbid-binary - repo: https://github.com/Lucas-C/pre-commit-hooks rev: v1.1.6 hooks: - id: forbid-crlf - id: forbid-tabs - repo: https://github.com/kynan/nbstripout rev: fe155a55548c61e4eb53522e57921077acf82c00 # pragma: allowlist secret hooks: - id: nbstripout exclude: ^notebooks/.*\.out\.ipynb$ - repo: https://github.com/Yelp/detect-secrets rev: v0.12.5 hooks: - id: detect-secrets args: ['--baseline', '.secrets.baseline'] - repo: https://github.com/pre-commit/pygrep-hooks rev: v1.4.1 # Use the ref you want to point at hooks: - id: python-no-eval - id: python-check-blanket-noqa - repo: https://github.com/asottile/yesqa rev: v0.0.11 hooks: - id: yesqa - repo: https://github.com/myint/eradicate rev: 522ed7ce2da82d33b3e2331bf50d4671c5a5af9a # pragma: allowlist secret hooks: - id: eradicate exclude: docs/conf.py ================================================ FILE: .readthedocs.yml ================================================ # .readthedocs.yml # Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details # Required version: 2 # Build documentation in the docs/ directory with Sphinx sphinx: configuration: docs/conf.py # Build documentation with MkDocs #mkdocs: # configuration: mkdocs.yml # Optionally build your docs in additional formats such as PDF and ePub formats: all # Optionally set the version of Python and requirements required to build your docs python: version: 3.6 install: - requirements: requirements/docs.txt ================================================ FILE: .secrets.baseline ================================================ { "exclude": { "files": null, "lines": null }, "generated_at": "2019-09-18T01:04:54Z", "plugins_used": [ { "name": "AWSKeyDetector" }, { "name": "ArtifactoryDetector" }, { "base64_limit": 4.5, "name": "Base64HighEntropyString" }, { "name": "BasicAuthDetector" }, { "hex_limit": 3, "name": "HexHighEntropyString" }, { "name": "KeywordDetector" }, { "name": "PrivateKeyDetector" }, { "name": "SlackDetector" }, { "name": "StripeDetector" } ], "results": {}, "version": "0.12.5" } ================================================ FILE: .travis.yml ================================================ language: python python: - "3.6" before_script: - "curl -H 'Cache-Control: no-cache' https://raw.githubusercontent.com/fossas/fossa-cli/master/install.sh | sudo bash" script: - ./integration_test_with_setup.sh - ./test.sh - cat requirements/*.txt >requirements.txt - '[ ! -z "$FOSSA_API_KEY" ] && (fossa init && fossa analyze) || true' ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: MANIFEST.in ================================================ include requirements/base.in include requirements/optimizers.in include requirements/ipynb.in include LICENSE include README.rst ================================================ FILE: README.rst ================================================ Installation ============ This project provides a benchmark framework to easily compare Bayesian optimization methods on real machine learning tasks. This project is experimental and the APIs are not considered stable. This Bayesian optimization (BO) benchmark framework requires a few easy steps for setup. It can be run either on a local machine (in serial) or prepare a *commands file* to run on a cluster as parallel experiments (dry run mode). Only ``Python>=3.6`` is officially supported, but older versions of Python likely work as well. The core package itself can be installed with: .. code-block:: bash pip install bayesmark However, to also require installation of all the "built in" optimizers for evaluation, run: .. code-block:: bash pip install bayesmark[optimizers] It is also possible to use the same pinned dependencies we used in testing by `installing from the repo <#install-in-editable-mode>`_. Building an environment to run the included notebooks can be done with: .. code-block:: bash pip install bayesmark[notebooks] Or, ``bayesmark[optimizers,notebooks]`` can be used. A quick example of running the benchmark is `here <#example>`_. The instructions are used to generate results as below: .. image:: https://user-images.githubusercontent.com/28273671/66338456-02516b80-e8f6-11e9-8156-2e84e04cf6fe.png :width: 95 % Non-pip dependencies -------------------- To be able to install ``opentuner`` some system level (non-pip) dependencies must be installed. This can be done with: .. code-block:: bash sudo apt-get install libsqlite3-0 sudo apt-get install libsqlite3-dev On Ubuntu, this results in: .. code-block:: console > dpkg -l | grep libsqlite ii libsqlite3-0:amd64 3.11.0-1ubuntu1 amd64 SQLite 3 shared library ii libsqlite3-dev:amd64 3.11.0-1ubuntu1 amd64 SQLite 3 development files The environment should now all be setup to run the BO benchmark. Running ======= Now we can run each step of the experiments. First, we run all combinations and then run some quick commands to analyze the output. Launch the experiments ---------------------- The experiments are run using the experiment launcher, which has the following interface: .. code-block:: usage: bayesmark-launch [-h] [-dir DB_ROOT] [-odir OPTIMIZER_ROOT] [-v] [-u UUID] [-dr DATA_ROOT] [-b DB] [-o OPTIMIZER [OPTIMIZER ...]] [-d DATA [DATA ...]] [-c [{DT,MLP-adam,MLP-sgd,RF,SVM,ada,kNN,lasso,linear} ...]] [-m [{acc,mae,mse,nll} ...]] [-n N_CALLS] [-p N_SUGGEST] [-r N_REPEAT] [-nj N_JOBS] [-ofile JOBS_FILE] The arguments are: .. code-block:: -h, --help show this help message and exit -dir DB_ROOT, -db-root DB_ROOT root directory for all benchmark experiments output -odir OPTIMIZER_ROOT, --opt-root OPTIMIZER_ROOT Directory with optimization wrappers -v, --verbose print the study logs to console -u UUID, --uuid UUID length 32 hex UUID for this experiment -dr DATA_ROOT, --data-root DATA_ROOT root directory for all custom csv files -b DB, --db DB database ID of this benchmark experiment -o OPTIMIZER [OPTIMIZER ...], --opt OPTIMIZER [OPTIMIZER ...] optimizers to use -d DATA [DATA ...], --data DATA [DATA ...] data sets to use -c, --classifier [{DT,MLP-adam,MLP-sgd,RF,SVM,ada,kNN,lasso,linear} ...] classifiers to use -m, --metric [{acc,mae,mse,nll} ...] scoring metric to use -n N_CALLS, --calls N_CALLS number of function evaluations -p N_SUGGEST, --suggestions N_SUGGEST number of suggestions to provide in parallel -r N_REPEAT, --repeat N_REPEAT number of repetitions of each study -nj N_JOBS, --num-jobs N_JOBS number of jobs to put in the dry run file, the default 0 value disables dry run (real run) -ofile JOBS_FILE, --jobs-file JOBS_FILE a jobs file with all commands to be run The output files will be placed in ``[DB_ROOT]/[DBID]``. If ``DBID`` is not specified, it will be a randomly created subdirectory with a new name to avoid overwriting previous experiments. The path to ``DBID`` is shown at the beginning of ``stdout`` when running ``bayesmark-launch``. In general, let the launcher create and setup ``DBID`` unless you are appending to a previous experiment, in which case, specify the existing ``DBID``. The launcher's sequence of commands can be accessed programmatically via :func:`.experiment_launcher.gen_commands`. The individual experiments can be launched programmatically via :func:`.experiment.run_sklearn_study`. Selecting the experiments ^^^^^^^^^^^^^^^^^^^^^^^^^ A list of optimizers, classifiers, data sets, and metrics can be listed using the ``-o``/``-c``/``-d``/``-m`` commands, respectively. If not specified, the program launches all possible options. Selecting the optimizer ^^^^^^^^^^^^^^^^^^^^^^^ A few different open source optimizers have been included as an example and are considered the "built-in" optimizers. The original repos are shown in the `Links <#links>`_. The data argument ``-o`` allows a list containing the "built-in" optimizers: .. code-block:: "HyperOpt", "Nevergrad-OnePlusOne", "OpenTuner-BanditA", "OpenTuner-GA", "OpenTuner-GA-DE", "PySOT", "RandomSearch", "Scikit-GBRT-Hedge", "Scikit-GP-Hedge", "Scikit-GP-LCB" or, one can specify a user-defined optimizer. The class containing an optimizer conforming to the API must be found in in the folder specified by ``--opt-root``. Additionally, a configuration defining each optimizer must be defined in ``[OPT_ROOT]/config.json``. The ``--opt-root`` and ``config.json`` may be omitted if only built-in optimizers are used. Additional details for providing a new optimizer are found in `adding a new optimizer <#adding-a-new-optimizer>`_. Selecting the data set ^^^^^^^^^^^^^^^^^^^^^^ By default, this benchmark uses the `sklearn example data sets `_ as the "built-in" data sets for use in ML model tuning problems. The data argument ``-d`` allows a list containing the "built-in" data sets: .. code-block:: "breast", "digits", "iris", "wine", "boston", "diabetes" or, it can refer to a custom ``csv`` file, which is the name of file in the folder specified by ``--data-root``. It also follows the convention that regression data sets start with ``reg-`` and classification data sets start with ``clf-``. For example, the classification data set in ``[DATA_ROOT]/clf-foo.csv`` is specified with ``-d clf-foo``. The ``csv`` file can be anything readable by pandas, but we assume the final column is the target and all other columns are features. The target column should be integer for classification data and float for regression. The features should float (or ``str`` for categorical variable columns). See ``bayesmark.data.load_data`` for more information. Dry run for cluster jobs ^^^^^^^^^^^^^^^^^^^^^^^^ It is also possible to do a "dry run" of the launcher by specifying a value for ``--num-jobs`` greater than zero. For example, if ``--num-jobs 50`` is provided, a text file listing 50 commands to run is produced, with one command (job) per line. This is useful when preparing a list of commands to run later on a cluster. A dry run will generate a command file (e.g., ``jobs.txt``) like the following (with a meta-data header). Each line corresponds to a command that can be used as a job on a different worker: .. code-block:: # running: {'--uuid': None, '-db-root': '/foo', '--opt-root': '/example_opt_root', '--data-root': None, '--db': 'bo_example_folder', '--opt': ['RandomSearch', 'PySOT'], '--data': None, '--classifier': ['SVM', 'DT'], '--metric': None, '--calls': 15, '--suggestions': 1, '--repeat': 3, '--num-jobs': 50, '--jobs-file': '/jobs.txt', '--verbose': False, 'dry_run': True, 'rev': '9a14ef2', 'opt_rev': None} # cmd: python bayesmark-launch -n 15 -r 3 -dir foo -o RandomSearch PySOT -c SVM DT -nj 50 -b bo_example_folder job_e2b63a9_00 bayesmark-exp -c SVM -d diabetes -o PySOT -u 079a155f03095d2ba414a5d2cedde08c -m mse -n 15 -p 1 -dir foo -b bo_example_folder && bayesmark-exp -c SVM -d boston -o RandomSearch -u 400e4c0be8295ad59db22d9b5f31d153 -m mse -n 15 -p 1 -dir foo -b bo_example_folder && bayesmark-exp -c SVM -d digits -o RandomSearch -u fe73a2aa960a5e3f8d78bfc4bcf51428 -m acc -n 15 -p 1 -dir foo -b bo_example_folder job_e2b63a9_01 bayesmark-exp -c DT -d diabetes -o PySOT -u db1d9297948554e096006c172a0486fb -m mse -n 15 -p 1 -dir foo -b bo_example_folder && bayesmark-exp -c SVM -d boston -o RandomSearch -u 7148f690ed6a543890639cc59db8320b -m mse -n 15 -p 1 -dir foo -b bo_example_folder && bayesmark-exp -c SVM -d breast -o PySOT -u 72c104ba1b6d5bb8a546b0064a7c52b1 -m nll -n 15 -p 1 -dir foo -b bo_example_folder job_e2b63a9_02 bayesmark-exp -c SVM -d iris -o PySOT -u cc63b2c1e4315a9aac0f5f7b496bfb0f -m nll -n 15 -p 1 -dir foo -b bo_example_folder && bayesmark-exp -c DT -d breast -o RandomSearch -u aec62e1c8b5552e6b12836f0c59c1681 -m nll -n 15 -p 1 -dir foo -b bo_example_folder && bayesmark-exp -c DT -d digits -o RandomSearch -u 4d0a175d56105b6bb3055c3b62937b2d -m acc -n 15 -p 1 -dir foo -b bo_example_folder ... This package does not have built in support for deploying these jobs on a cluster or cloud environment (.e.g., AWS). The UUID argument ^^^^^^^^^^^^^^^^^ The ``UUID`` is a 32-char hex string used as a master random seed which we use to draw random seeds for the experiments. If ``UUID`` is not specified a version 4 UUID is generated. The used UUID is displayed at the beginning of ``stdout``. In general, the ``UUID`` should not specified/re-used except for debugging because it violates the assumption that the experiment UUIDs are unique. Aggregate results ----------------- Next to aggregate all the experiment files into combined (json) files we need to run the aggregation command: .. code-block:: usage: bayesmark-agg [-h] [-dir DB_ROOT] [-odir OPTIMIZER_ROOT] [-v] -b DB [-rv] The arguments are: .. code-block:: -h, --help show this help message and exit -dir DB_ROOT, -db-root DB_ROOT root directory for all benchmark experiments output -odir OPTIMIZER_ROOT, --opt-root OPTIMIZER_ROOT Directory with optimization wrappers -v, --verbose print the study logs to console -b DB, --db DB database ID of this benchmark experiment -rv, --ravel ravel all studies to store batch suggestions as if they were serial The ``DB_ROOT`` must match the folder from the launcher ``bayesmark-launch``, and ``DBID`` must match that displayed from the launcher as well. The aggregate files are found in ``[DB_ROOT]/[DBID]/derived``. The result aggregation can be done programmatically via :func:`.experiment_aggregate.concat_experiments`. Analyze and summarize results ----------------------------- Finally, to run a statistical analysis presenting a summary of the experiments we run .. code-block:: usage: bayesmark-anal [-h] [-dir DB_ROOT] [-odir OPTIMIZER_ROOT] [-v] -b DB The arguments are: .. code-block:: -h, --help show this help message and exit -dir DB_ROOT, -db-root DB_ROOT root directory for all benchmark experiments output -odir OPTIMIZER_ROOT, --opt-root OPTIMIZER_ROOT Directory with optimization wrappers -v, --verbose print the study logs to console -b DB, --db DB database ID of this benchmark experiment The ``DB_ROOT`` must match the folder from the launcher ``bayesmark-launch``, and ``DBID`` must match that displayed from the launcher as well. The aggregate files are found in ``[DB_ROOT]/[DBID]/derived``. The ``bayesmark-anal`` command looks for a ``baseline.json`` file in ``[DB_ROOT]/[DBID]/derived``, which states the best possible and random search performance. If no such file is present, ``bayesmark-anal`` automatically calls ``bayesmark-baseline`` to build it. The baselines are inferred from the random search performance in the logs. The baseline values are considered fixed (not random) quantities when ``bayesmark-anal`` builds confidence intervals. Therefore, we allow the user to leave them fixed and do not rebuild them when ``bayesmark-anal`` is called if a baselines file is already present. The result analysis can be done programmatically via :func:`.experiment_analysis.compute_aggregates`, and the baseline computation via :func:`.experiment_baseline.compute_baseline`. See :ref:`how-scoring-works` for more information on how the scores are computed and aggregated. Example ------- After finishing the setup (environment) a small-scale serial can be run as follows: .. code-block:: console > # setup > DB_ROOT=./notebooks # path/to/where/you/put/results > DBID=bo_example_folder > mkdir $DB_ROOT > # experiments > bayesmark-launch -n 15 -r 3 -dir $DB_ROOT -b $DBID -o RandomSearch PySOT -c SVM DT -v Supply --uuid 3adc3182635e44ea96969d267591f034 to reproduce this run. Supply --dbid bo_example_folder to append to this experiment or reproduce jobs file. User must ensure equal reps of each optimizer for unbiased results -c DT -d boston -o PySOT -u a1b287b450385ad09b2abd7582f404a2 -m mae -n 15 -p 1 -dir /notebooks -b bo_example_folder -c DT -d boston -o PySOT -u 63746599ae3f5111a96942d930ba1898 -m mse -n 15 -p 1 -dir /notebooks -b bo_example_folder -c DT -d boston -o RandomSearch -u 8ba16c880ef45b27ba0909199ab7aa8a -m mae -n 15 -p 1 -dir /notebooks -b bo_example_folder ... 0 failures of benchmark script after 144 studies. done > # aggregate > bayesmark-agg -dir $DB_ROOT -b $DBID > # analyze > bayesmark-anal -dir $DB_ROOT -b $DBID -v ... median score @ 15: optimizer PySOT_0.2.3_9b766b6 0.330404 RandomSearch_0.0.1_9b766b6 0.961829 mean score @ 15: optimizer PySOT_0.2.3_9b766b6 0.124262 RandomSearch_0.0.1_9b766b6 0.256422 normed mean score @ 15: optimizer PySOT_0.2.3_9b766b6 0.475775 RandomSearch_0.0.1_9b766b6 0.981787 done The aggregate result files (i.e., ``summary.json``) will now be available in ``$DB_ROOT/$DBID/derived``. However, this will be high variance since it was from only 3 trials and only to 15 function evaluations. Plotting and notebooks ---------------------- Plotting the quantitative results found in ``$DB_ROOT/$DBID/derived`` can be done using the notebooks found in the ``notebooks/`` folder of the git repository. The notebook ``plot_mean_score.ipynb`` generates plots for aggregate scores averaging over all problems. The notebook ``plot_test_case.ipynb`` generates plots for each test problem. To use the notebooks, first copy over the ``notebooks/`` folder from git repository. To setup the kernel for running the notebooks use: .. code-block:: bash virtualenv bobm_ipynb --python=python3.6 source ./bobm_ipynb/bin/activate pip install bayesmark[notebooks] python -m ipykernel install --name=bobm_ipynb --user Now, the notebooks for plotting can be run with the command ``jupyter notebook`` and selecting the kernel ``bobm_ipynb``. It is also possible to convert the notebooks to an HTML report at the command line using ``nbconvert``. For example, use the command: .. code-block:: bash jupyter nbconvert --to html --execute notebooks/plot_mean_score.ipynb The output file will be in ``./notebooks/plot_mean_score.html``. Here is an example `export `_. See the ``nbconvert`` `documentation page `_ for more output formats. By default, the notebooks look in ``./notebooks/bo_example_folder/`` for the ``summary.json`` from ``bayesmark-anal``. To run ``plot_test_case.ipynb`` use the command: .. code-block:: bash jupyter nbconvert --to html --execute notebooks/plot_test_case.ipynb --ExecutePreprocessor.timeout=600 The ``--ExecutePreprocessor.timeout=600`` timeout increase is needed due to the large number of plots being generated. The output will be in ``./notebooks/plot_test_case.html``. Adding a new optimizer ====================== All optimizers in this benchmark are required to follow the interface specified of the ``AbstractOptimizer`` class in ``bayesmark.abstract_optimizer``. In general, this requires creating a wrapper class around the new optimizer. The wrapper classes must all be placed in a folder referred to by the ``--opt-root`` argument. This folder must also contain the ``config.json`` folder. The interface is simple, one must merely implement the ``suggest`` and ``observe`` functions. The ``suggest`` function generates new guesses for evaluating the function. Once evaluated, the function evaluations are passed to the ``observe`` function. The objective function is *not* evaluated by the optimizer class. The objective function is evaluated on outside and results are passed to ``observe``. This is the correct setup for Bayesian optimization because: * We can observe/try inputs that were never suggested * We can ignore suggestions * The objective function may not be something as simple as a Python function So passing the function as an argument as is done in ``scipy.optimization`` is artificially restrictive. The implementation of the wrapper will look like the following: .. code-block:: python from bayesmark.abstract_optimizer import AbstractOptimizer from bayesmark.experiment import experiment_main class NewOptimizerName(AbstractOptimizer): # Used for determining the version number of package used primary_import = "name of import used e.g, opentuner" def __init__(self, api_config, optional_arg_foo=None, optional_arg_bar=None): """Build wrapper class to use optimizer in benchmark. Parameters ---------- api_config : dict-like of dict-like Configuration of the optimization variables. See API description. """ AbstractOptimizer.__init__(self, api_config) # Do whatever other setup is needed # ... def suggest(self, n_suggestions=1): """Get suggestion from the optimizer. Parameters ---------- n_suggestions : int Desired number of parallel suggestions in the output Returns ------- next_guess : list of dict List of `n_suggestions` suggestions to evaluate the objective function. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. """ # Do whatever is needed to get the parallel guesses # ... return x_guess def observe(self, X, y): """Feed an observation back. Parameters ---------- X : list of dict-like Places where the objective function has already been evaluated. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. y : array-like, shape (n,) Corresponding values where objective has been evaluated """ # Update the model with new objective function observations # ... # No return statement needed if __name__ == "__main__": # This is the entry point for experiments, so pass the class to experiment_main to use this optimizer. # This statement must be included in the wrapper class file: experiment_main(NewOptimizerName) Depending on the API of the optimizer being wrapped, building this wrapper class may only or require a few lines of code, or be a total pain. The config file --------------- Note: A config file is now optional. If no ``config.json`` is provided, the experiment launcher will look for all folders with an `optimizer.py` in the ``--opt-root`` directory. Each optimizer wrapper can have multiple configurations, which is each referred to as a different optimizer in the benchmark. For example, the JSON config file will have entries as follows: .. code-block:: json { "OpenTuner-BanditA-New": [ "opentuner_optimizer.py", {"techniques": ["AUCBanditMetaTechniqueA"]} ], "OpenTuner-GA-DE-New": [ "opentuner_optimizer.py", {"techniques": ["PSO_GA_DE"]} ], "OpenTuner-GA-New": [ "opentuner_optimizer.py", {"techniques": ["PSO_GA_Bandit"]} ] } Basically, the entries are ``"name_of_strategy": ["file_with_class", {kwargs_for_the_constructor}]``. Here, ``OpenTuner-BanditA``, ``OpenTuner-GA-DE``, and ``OpenTuner-GA`` are all treated as different optimizers by the benchmark even though the all use the same class from ``opentuner_optimizer.py``. This ``config.json`` must be in the same folder as the optimizer classes (e.g., ``opentuner_optimizer.py``). Running with a new optimizer ---------------------------- To run the benchmarks using a new optimizer, simply provide its name (from ``config.json``) in the ``-o`` list. The ``--opt-root`` argument must be specified in this case. For example, the launch command from the `example <#example>`_ becomes: .. code-block:: bash bayesmark-launch -n 15 -r 3 -dir $DB_ROOT -b $DBID -o RandomSearch PySOT-New -c SVM DT --opt-root ./example_opt_root -v Here, we are using the example ``PySOT-New`` wrapper from the ``example_opt_root`` folder in the git repo. It is equivalent to the builtin ``PySOT``, but gives an example of how to provide a new custom optimizer. Contributing ============ The following instructions have been tested with Python 3.6.8 on Ubuntu (16.04.5 LTS). Install in editable mode ------------------------ First, define the variables for the paths we will use: .. code-block:: bash GIT=/path/to/where/you/put/repos ENVS=/path/to/where/you/put/virtualenvs Then clone the repo in your git directory ``$GIT``: .. code-block:: bash cd $GIT git clone https://github.com/uber/bayesmark.git Inside your virtual environments folder ``$ENVS``, make the environment: .. code-block:: bash cd $ENVS virtualenv bayesmark --python=python3.6 source $ENVS/bayesmark/bin/activate Now we can install the pip dependencies. Move back into your git directory and run .. code-block:: bash cd $GIT/bayesmark pip install -r requirements/base.txt pip install -r requirements/optimizers.txt pip install -e . # Install the benchmark itself You may want to run ``pip install -U pip`` first if you have an old version of ``pip``. The file ``optimizers.txt`` contains the dependencies for all the optimizers used in the benchmark. The analysis and aggregation programs can be run using only the requirements in ``base.txt``. Contributor tools ----------------- First, we need to setup some needed tools: .. code-block:: bash cd $ENVS virtualenv bayesmark_tools --python=python3.6 source $ENVS/bayesmark_tools/bin/activate pip install -r $GIT/bayesmark/requirements/tools.txt To install the pre-commit hooks for contributing run (in the ``bayesmark_tools`` environment): .. code-block:: bash cd $GIT/bayesmark pre-commit install To rebuild the requirements, we can run: .. code-block:: bash cd $GIT/bayesmark # Get py files from notebooks to analyze jupyter nbconvert --to script notebooks/*.ipynb # Generate the .in files (but pins to latest, which we might not want) pipreqs bayesmark/ --ignore bayesmark/builtin_opt/ --savepath requirements/base.in pipreqs test/ --savepath requirements/test.in pipreqs bayesmark/builtin_opt/ --savepath requirements/optimizers.in pipreqs notebooks/ --savepath requirements/ipynb.in pipreqs docs/ --savepath requirements/docs.in # Regenerate the .txt files from .in files pip-compile-multi --no-upgrade Generating the documentation ---------------------------- First setup the environment for building with ``Sphinx``: .. code-block:: bash cd $ENVS virtualenv bayesmark_docs --python=python3.6 source $ENVS/bayesmark_docs/bin/activate pip install -r $GIT/bayesmark/requirements/docs.txt Then we can do the build: .. code-block:: bash cd $GIT/bayesmark/docs make all open _build/html/index.html Documentation will be available in all formats in ``Makefile``. Use ``make html`` to only generate the HTML documentation. Running the tests ----------------- The tests for this package can be run with: .. code-block:: bash cd $GIT/bayesmark ./test.sh The script creates a conda environment using the requirements found in ``requirements/test.txt``. The ``test.sh`` script *must* be run from a *clean* git repo. Or if we only want to run the unit tests and not check the adequacy of the requirements files, one can use .. code-block:: bash # Setup environment cd $ENVS virtualenv bayesmark_test --python=python3.6 source $ENVS/bayesmark_test/bin/activate pip install -r $GIT/bayesmark/requirements/test.txt pip install -e $GIT/bayesmark # Now run tests cd $GIT/bayesmark/ pytest test/ -s -v --hypothesis-seed=0 --disable-pytest-warnings --cov=bayesmark --cov-report html A code coverage report will also be produced in ``$GIT/bayesmark/htmlcov/index.html``. Deployment ---------- The wheel (tar ball) for deployment as a pip installable package can be built using the script: .. code-block:: bash cd $GIT/bayesmark/ ./build_wheel.sh Links ===== The `source `_ is hosted on GitHub. The `documentation `_ is hosted at Read the Docs. Installable from `PyPI `_. The builtin optimizers are wrappers on the following projects: * `HyperOpt `_ * `Nevergrad `_ * `OpenTuner `_ * `PySOT `_ * `Scikit-optimize `_ License ======= This project is licensed under the Apache 2 License - see the LICENSE file for details. ================================================ FILE: bayesmark/__init__.py ================================================ __version__ = "0.0.8" __author__ = "Ryan Turner" __license__ = "Apache v2" ================================================ FILE: bayesmark/abstract_optimizer.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Abstract base class for the optimizers in the benchmark. This creates a common API across all packages. """ from abc import ABC, abstractmethod from importlib_metadata import version class AbstractOptimizer(ABC): """Abstract base class for the optimizers in the benchmark. This creates a common API across all packages. """ # Every implementation package needs to specify this static variable, e.g., "primary_import=opentuner" primary_import = None def __init__(self, api_config, **kwargs): """Build wrapper class to use an optimizer in benchmark. Parameters ---------- api_config : dict-like of dict-like Configuration of the optimization variables. See API description. """ self.api_config = api_config @classmethod def get_version(cls): """Get the version for this optimizer. Returns ------- version_str : str Version number of the optimizer. Usually, this is equivalent to ``package.__version__``. """ assert (cls.primary_import is None) or isinstance(cls.primary_import, str) # Should use x.x.x as version if sub-class did not specify its primary import version_str = "x.x.x" if cls.primary_import is None else version(cls.primary_import) return version_str @abstractmethod def suggest(self, n_suggestions): """Get a suggestion from the optimizer. Parameters ---------- n_suggestions : int Desired number of parallel suggestions in the output Returns ------- next_guess : list of dict List of `n_suggestions` suggestions to evaluate the objective function. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. """ pass @abstractmethod def observe(self, X, y): """Send an observation of a suggestion back to the optimizer. Parameters ---------- X : list of dict-like Places where the objective function has already been evaluated. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. y : array-like, shape (n,) Corresponding values where objective has been evaluated """ pass ================================================ FILE: bayesmark/builtin_opt/__init__.py ================================================ ================================================ FILE: bayesmark/builtin_opt/config.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from bayesmark.constants import RANDOM_SEARCH CONFIG = { "HyperOpt": ["hyperopt_optimizer.py", {}], "Nevergrad-OnePlusOne": ["nevergrad_optimizer.py", {"budget": 300, "tool": "OnePlusOne"}], "OpenTuner-BanditA": ["opentuner_optimizer.py", {"techniques": ["AUCBanditMetaTechniqueA"]}], "OpenTuner-GA": ["opentuner_optimizer.py", {"techniques": ["PSO_GA_Bandit"]}], "OpenTuner-GA-DE": ["opentuner_optimizer.py", {"techniques": ["PSO_GA_DE"]}], "PySOT": ["pysot_optimizer.py", {}], "RandomSearch": ["random_optimizer.py", {}], "Scikit-GBRT-Hedge": [ "scikit_optimizer.py", {"acq_func": "gp_hedge", "base_estimator": "GBRT", "n_initial_points": 5}, ], "Scikit-GP-Hedge": ["scikit_optimizer.py", {"acq_func": "gp_hedge", "base_estimator": "GP", "n_initial_points": 5}], "Scikit-GP-LCB": ["scikit_optimizer.py", {"acq_func": "LCB", "base_estimator": "GP", "n_initial_points": 5}], } assert RANDOM_SEARCH in CONFIG, "%s required in settings file." % RANDOM_SEARCH ================================================ FILE: bayesmark/builtin_opt/hyperopt_optimizer.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np from hyperopt import hp, tpe from hyperopt.base import JOB_STATE_DONE, JOB_STATE_NEW, STATUS_OK, Domain, Trials from scipy.interpolate import interp1d from bayesmark.abstract_optimizer import AbstractOptimizer from bayesmark.np_util import random as np_random from bayesmark.np_util import random_seed # Sklearn prefers str to unicode: DTYPE_MAP = {"real": float, "int": int, "bool": bool, "cat": str, "ordinal": str} def dummy_f(x): assert False, "This is a placeholder, it should never be called." def only(x): y, = x return y class HyperoptOptimizer(AbstractOptimizer): primary_import = "hyperopt" def __init__(self, api_config, random=np_random): """Build wrapper class to use hyperopt optimizer in benchmark. Parameters ---------- api_config : dict-like of dict-like Configuration of the optimization variables. See API description. """ AbstractOptimizer.__init__(self, api_config) self.random = random space, self.round_to_values = HyperoptOptimizer.get_hyperopt_dimensions(api_config) self.domain = Domain(dummy_f, space, pass_expr_memo_ctrl=None) self.trials = Trials() # Some book keeping like opentuner wrapper self.trial_id_lookup = {} # Store just for data validation self.param_set_chk = frozenset(api_config.keys()) @staticmethod def hashable_dict(d): """A custom function for hashing dictionaries. Parameters ---------- d : dict or dict-like The dictionary to be converted to immutable/hashable type. Returns ------- hashable_object : frozenset of tuple pairs Bijective equivalent to dict that can be hashed. """ hashable_object = frozenset(d.items()) return hashable_object @staticmethod def get_hyperopt_dimensions(api_config): """Help routine to setup hyperopt search space in constructor. Take api_config as argument so this can be static. """ # The ordering of iteration prob makes no difference, but just to be # safe and consistnent with space.py, I will make sorted. param_list = sorted(api_config.keys()) space = {} round_to_values = {} for param_name in param_list: param_config = api_config[param_name] param_type = param_config["type"] param_space = param_config.get("space", None) param_range = param_config.get("range", None) param_values = param_config.get("values", None) # Some setup for case that whitelist of values is provided: values_only_type = param_type in ("cat", "ordinal") if (param_values is not None) and (not values_only_type): assert param_range is None param_values = np.unique(param_values) param_range = (param_values[0], param_values[-1]) round_to_values[param_name] = interp1d( param_values, param_values, kind="nearest", fill_value="extrapolate" ) if param_type == "int": low, high = param_range if param_space in ("log", "logit"): space[param_name] = hp.qloguniform(param_name, np.log(low), np.log(high), 1) else: space[param_name] = hp.quniform(param_name, low, high, 1) elif param_type == "bool": assert param_range is None assert param_values is None space[param_name] = hp.choice(param_name, (False, True)) elif param_type in ("cat", "ordinal"): assert param_range is None space[param_name] = hp.choice(param_name, param_values) elif param_type == "real": low, high = param_range if param_space in ("log", "logit"): space[param_name] = hp.loguniform(param_name, np.log(low), np.log(high)) else: space[param_name] = hp.uniform(param_name, low, high) else: assert False, "type %s not handled in API" % param_type return space, round_to_values def get_trial(self, trial_id): for trial in self.trials._dynamic_trials: if trial["tid"] == trial_id: assert isinstance(trial, dict) # Make sure right kind of dict assert "state" in trial and "result" in trial assert trial["state"] == JOB_STATE_NEW return trial assert False, "No matching trial ID" def cleanup_guess(self, x_guess): assert isinstance(x_guess, dict) # Also, check the keys are only the vars we are searching over: assert frozenset(x_guess.keys()) == self.param_set_chk # Do the rounding # Make a copy to be safe, and also unpack singletons # We may also need to consider clip_chk at some point like opentuner x_guess = {k: only(x_guess[k]) for k in x_guess} for param_name, round_f in self.round_to_values.items(): x_guess[param_name] = round_f(x_guess[param_name]) # Also ensure this is correct dtype so sklearn is happy x_guess = {k: DTYPE_MAP[self.api_config[k]["type"]](x_guess[k]) for k in x_guess} return x_guess def _suggest(self): """Helper function to `suggest` that does the work of calling `hyperopt` via its dumb API. """ new_ids = self.trials.new_trial_ids(1) assert len(new_ids) == 1 self.trials.refresh() seed = random_seed(self.random) new_trials = tpe.suggest(new_ids, self.domain, self.trials, seed) assert len(new_trials) == 1 self.trials.insert_trial_docs(new_trials) self.trials.refresh() new_trial, = new_trials # extract singleton return new_trial def suggest(self, n_suggestions=1): """Make `n_suggestions` suggestions for what to evaluate next. This requires the user observe all previous suggestions before calling again. Parameters ---------- n_suggestions : int The number of suggestions to return. Returns ------- next_guess : list of dict List of `n_suggestions` suggestions to evaluate the objective function. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. """ assert n_suggestions >= 1, "invalid value for n_suggestions" # Get the new trials, it seems hyperopt either uses random search or # guesses one at a time anyway, so we might as welll call serially. new_trials = [self._suggest() for _ in range(n_suggestions)] X = [] for trial in new_trials: x_guess = self.cleanup_guess(trial["misc"]["vals"]) X.append(x_guess) # Build lookup to get original trial object x_guess_ = HyperoptOptimizer.hashable_dict(x_guess) assert x_guess_ not in self.trial_id_lookup, "the suggestions should not already be in the trial dict" self.trial_id_lookup[x_guess_] = trial["tid"] assert len(X) == n_suggestions return X def observe(self, X, y): """Feed the observations back to hyperopt. Parameters ---------- X : list of dict-like Places where the objective function has already been evaluated. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. y : array-like, shape (n,) Corresponding values where objective has been evaluated. """ assert len(X) == len(y) for x_guess, y_ in zip(X, y): x_guess_ = HyperoptOptimizer.hashable_dict(x_guess) assert x_guess_ in self.trial_id_lookup, "Appears to be guess that did not originate from suggest" trial_id = self.trial_id_lookup.pop(x_guess_) trial = self.get_trial(trial_id) assert self.cleanup_guess(trial["misc"]["vals"]) == x_guess, "trial ID not consistent with x values stored" # Cast to float to ensure native type result = {"loss": float(y_), "status": STATUS_OK} trial["state"] = JOB_STATE_DONE trial["result"] = result # hyperopt.fmin.FMinIter.serial_evaluate only does one refresh at end # of loop of a bunch of evals, so we will do the same thing here. self.trials.refresh() opt_wrapper = HyperoptOptimizer ================================================ FILE: bayesmark/builtin_opt/nevergrad_optimizer.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import nevergrad.optimization as optimization import numpy as np from nevergrad import instrumentation as inst from scipy.stats import norm from bayesmark.abstract_optimizer import AbstractOptimizer from bayesmark.np_util import linear_rescale from bayesmark.space import Real class NevergradOptimizer(AbstractOptimizer): primary_import = "nevergrad" def __init__(self, api_config, tool, budget): """Build wrapper class to use nevergrad optimizer in benchmark. Parameters ---------- api_config : dict-like of dict-like Configuration of the optimization variables. See API description. budget : int Expected number of max function evals """ AbstractOptimizer.__init__(self, api_config) self.instrum, self.space = NevergradOptimizer.get_nvg_dimensions(api_config) dimension = self.instrum.dimension opt_class = optimization.registry[tool] self.optim = opt_class(dimension=dimension, budget=budget) @staticmethod def get_nvg_dimensions(api_config): """Help routine to setup nevergrad search space in constructor. Take api_config as argument so this can be static. """ # The ordering of iteration prob makes no difference, but just to be # safe and consistnent with space.py, I will make sorted. param_list = sorted(api_config.keys()) all_args = {} all_prewarp = {} for param_name in param_list: param_config = api_config[param_name] param_type = param_config["type"] param_space = param_config.get("space", None) param_range = param_config.get("range", None) param_values = param_config.get("values", None) prewarp = None if param_type == "cat": assert param_space is None assert param_range is None arg = inst.var.SoftmaxCategorical(param_values) elif param_type == "bool": assert param_space is None assert param_range is None assert param_values is None arg = inst.var.OrderedDiscrete([False, True]) elif param_values is not None: assert param_type in ("int", "ordinal", "real") arg = inst.var.OrderedDiscrete(param_values) # We are throwing away information here, but OrderedDiscrete # appears to be invariant to monotonic transformation anyway. elif param_type == "int": assert param_values is None # Need +1 since API in inclusive choices = range(int(param_range[0]), int(param_range[-1]) + 1) arg = inst.var.OrderedDiscrete(choices) # We are throwing away information here, but OrderedDiscrete # appears to be invariant to monotonic transformation anyway. elif param_type == "real": assert param_values is None assert param_range is not None # Will need to warp to this space sep. arg = inst.var.Gaussian(mean=0, std=1) prewarp = Real(warp=param_space, range_=param_range) else: assert False, "type %s not handled in API" % param_type all_args[param_name] = arg all_prewarp[param_name] = prewarp instrum = inst.Instrumentation(**all_args) return instrum, all_prewarp def prewarp(self, xx): """Extra work needed to get variables into the Gaussian space representation.""" xxw = {} for arg_name, vv in xx.items(): assert np.isscalar(vv) space = self.space[arg_name] if space is not None: # Warp so we think it is apriori uniform in [a, b] vv = space.warp(vv) assert vv.size == 1 # Now make uniform on [0, 1], also unpack warped to scalar (lb, ub), = space.get_bounds() vv = linear_rescale(vv.item(), lb, ub, 0, 1) # Now make std Gaussian apriori vv = norm.ppf(vv) assert np.isscalar(vv) xxw[arg_name] = vv return xxw def postwarp(self, xxw): """Extra work needed to undo the Gaussian space representation.""" xx = {} for arg_name, vv in xxw.items(): assert np.isscalar(vv) space = self.space[arg_name] if space is not None: # Now make std Gaussian apriori vv = norm.cdf(vv) # Now make uniform on [0, 1] (lb, ub), = space.get_bounds() vv = linear_rescale(vv, 0, 1, lb, ub) # Warp so we think it is apriori uniform in [a, b] vv = space.unwarp([vv]) assert np.isscalar(vv) xx[arg_name] = vv return xx def suggest(self, n_suggestions=1): """Get suggestion from nevergrad. Parameters ---------- n_suggestions : int Desired number of parallel suggestions in the output Returns ------- next_guess : list of dict List of `n_suggestions` suggestions to evaluate the objective function. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. """ x_guess_data = [self.optim.ask() for _ in range(n_suggestions)] x_guess = [None] * n_suggestions for ii, xx in enumerate(x_guess_data): x_pos, x_kwarg = self.instrum.data_to_arguments(xx) assert x_pos == () x_guess[ii] = self.postwarp(x_kwarg) return x_guess def observe(self, X, y): """Feed an observation back to nevergrad. Parameters ---------- X : list of dict-like Places where the objective function has already been evaluated. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. y : array-like, shape (n,) Corresponding values where objective has been evaluated """ for xx, yy in zip(X, y): xx = self.prewarp(xx) xx = self.instrum.arguments_to_data(**xx) self.optim.tell(xx, yy) opt_wrapper = NevergradOptimizer ================================================ FILE: bayesmark/builtin_opt/opentuner_optimizer.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ In opentuner, many search techniques are already available. All the names of the techniques can be found as follows: ``` >>> import opentuner >>> techniques, generators = opentuner.search.technique.all_techniques() >>> for t in techniques: ... print t.name ``` A user can also create new search techniques (http://opentuner.org/tutorial/techniques/). Opentuner will create a multi-arm bandit of multiple techniques if more than one technique is specified in `args.technique`. Some bandits with pre-defined techniques are already registered in: `opentuner.search.bandittechniques` By default, we use a pre-defined bandit called `'AUCBanditMetaTechniqueA'` of 4 techniques: ``` register(AUCBanditMetaTechnique([ differentialevolution.DifferentialEvolutionAlt(), evolutionarytechniques.UniformGreedyMutation(), evolutionarytechniques.NormalGreedyMutation(mutation_rate=0.3), simplextechniques.RandomNelderMead()], name='AUCBanditMetaTechniqueA')) ``` The other two bandits used in our experiments are: PSO_GA_DE and PSO_GA_Bandit. Specifying a list of multiple techniques will use a multi-arm bandit over them. """ import warnings from argparse import Namespace import opentuner.tuningrunmain from opentuner.api import TuningRunManager from opentuner.measurement.interface import DefaultMeasurementInterface as DMI from opentuner.resultsdb.models import DesiredResult, Result from opentuner.search.manipulator import ( ConfigurationManipulator, EnumParameter, FloatParameter, IntegerParameter, LogFloatParameter, LogIntegerParameter, ScaledNumericParameter, ) from bayesmark.abstract_optimizer import AbstractOptimizer from bayesmark.np_util import clip_chk DEFAULT_TECHNIQUES = ("AUCBanditMetaTechniqueA",) MEMORY_ONLY_DB = "sqlite://" # Monkey patch here! Opentuner is messed up, TuningRunMain changes global log # settings. We should file in issue report here and have them fix it. opentuner.tuningrunmain.init_logging = lambda: None def ClippedParam(cls, epsilon=1e-5): """Build wrapper class of opentuner parameter class that use clip check to keep parameters in the allowed range despite numerical errors. Class built on `ScaledNumericParameter` abstract class defined in: `opentuner.search.manipulator.ScaledNumericParameter`. Parameters ---------- cls : ScaledNumericParameter Opentuner parameter class, such as `LogFloatParameter` or `FloatParameter`, which transforms the domain of parameter. Returns ------- StableClass : ScaledNumericParameter New class equivalent to original `cls` but it overwrites the orginal `_unscale` method to enforce a clip check to keep the parameters within their allowed range. """ assert issubclass( cls, ScaledNumericParameter ), "this class cls should inherit from the ScaledNumericParameter class" class StableClass(cls): def _unscale(self, v): unscaled_v = super(StableClass, self)._unscale(v) unscaled_v = clip_chk(unscaled_v, self.min_value, self.max_value) return unscaled_v return StableClass class OpentunerOptimizer(AbstractOptimizer): primary_import = "opentuner" def __init__(self, api_config, techniques=DEFAULT_TECHNIQUES, n_suggestions=1): """Build wrapper class to use opentuner optimizer in benchmark. Parameters ---------- api_config : dict-like of dict-like Configuration of the optimization variables. See API description. techniques : iterable of strings A list or tuple of techniques to use in opentuner. If the list has only one technique, then that technique will be used. If the list has multiple techniques a bandit over those techniques will be used. n_suggestions : int Default number of suggestions to be made in parallel. """ AbstractOptimizer.__init__(self, api_config) # Opentuner requires DesiredResult to reference suggestion when making # its observation. x_to_dr maps the dict suggestion to DesiredResult. self.x_to_dr = {} # Keep last suggested x and repeat it whenever opentuner gives up. self.dummy_suggest = None """Setting up the arguments for opentuner. You can see all possible arguments using: ``` >>> import opentuner >>> opentuner.default_argparser().parse_args(['-h']) ``` We only change a few arguments (other arguments are set to defaults): * database = MEMORY_ONLY_DB: to use an in-memory sqlite database * parallelism = n_suggestions: num of suggestions to give in parallel * technique = techniques: a list of techniques to be used by opentuner * print_params = False: to avoid opentuner from exiting after printing param spaces """ args = Namespace( bail_threshold=500, database=MEMORY_ONLY_DB, display_frequency=10, generate_bandit_technique=False, label=None, list_techniques=False, machine_class=None, no_dups=False, parallel_compile=False, parallelism=n_suggestions, pipelining=0, print_params=False, print_search_space_size=False, quiet=False, results_log=None, results_log_details=None, seed_configuration=[], stop_after=None, technique=techniques, test_limit=5000, ) # Setup some dummy classes required by opentuner to actually run. manipulator = OpentunerOptimizer.build_manipulator(api_config) interface = DMI(args=args, manipulator=manipulator) self.api = TuningRunManager(interface, args) @staticmethod def hashable_dict(d): """A custom function for hashing dictionaries. Parameters ---------- d : dict or dict-like The dictionary to be converted to immutable/hashable type. Returns ------- hashable_object : frozenset of tuple pairs Bijective equivalent to dict that can be hashed. """ hashable_object = frozenset(d.items()) return hashable_object @staticmethod def build_manipulator(api_config): """Build a ConfigurationManipulator object to be used by opentuner. Parameters ---------- api_config : dict-like of dict-like Configuration of the optimization variables. See API description. Returns ------- manipulator : ConfigurationManipulator Some over complexified class required by opentuner to run. """ manipulator = ConfigurationManipulator() for pname in api_config: ptype = api_config[pname]["type"] pspace = api_config[pname].get("space", None) pmin, pmax = api_config[pname].get("range", (None, None)) if ptype == "real": if pspace in ("linear", "logit"): ot_param = FloatParameter(pname, pmin, pmax) elif pspace in ("log", "bilog"): LogFloatParameter_ = ClippedParam(LogFloatParameter) ot_param = LogFloatParameter_(pname, pmin, pmax) else: assert False, "unsupported param space = %s" % pspace elif ptype == "int": if pspace in ("linear", "logit"): ot_param = IntegerParameter(pname, pmin, pmax) elif pspace in ("log", "bilog"): ot_param = LogIntegerParameter(pname, pmin, pmax) else: assert False, "unsupported param space = %s" % pspace elif ptype == "bool": # The actual bool parameter seems not to work in Py3 :( ot_param = IntegerParameter(pname, 0, 1) elif ptype in ("cat", "ordinal"): # Treat ordinal and categorical variables the same for now. assert "values" in api_config[pname] pvalues = api_config[pname]["values"] ot_param = EnumParameter(pname, pvalues) else: assert False, "type=%s/space=%s not handled in opentuner yet" % (ptype, pspace) manipulator.add_parameter(ot_param) return manipulator def suggest(self, n_suggestions=1): """Make `n_suggestions` suggestions for what to evaluate next. This requires the user observe all previous suggestions before calling again. Parameters ---------- n_suggestions : int The number of suggestions to return. Returns ------- next_guess : list of dict List of `n_suggestions` suggestions to evaluate the objective function. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. """ assert n_suggestions >= 1, "invalid value for n_suggestions" # Update the n_suggestions if it is different from the current setting. if self.api.search_driver.args.parallelism != n_suggestions: self.api.search_driver.args.parallelism = n_suggestions warnings.warn("n_suggestions changed across suggest calls") # Require the user to already observe all previous suggestions. # Otherwise, opentuner will just recycle old suggestions. assert len(self.x_to_dr) == 0, "all the previous suggestions should have been observed by now" # The real meat of suggest from opentuner: Get next `n_suggestions` # unique suggestions. desired_results = [self.api.get_next_desired_result() for _ in range(n_suggestions)] # Save DesiredResult object in dict since observe will need it. X = [] using_dummy_suggest = False for ii in range(n_suggestions): # Opentuner can give up, but the API requires guessing forever. if desired_results[ii] is None: assert self.dummy_suggest is not None, "opentuner gave up on the first call!" # Use the dummy suggestion in this case. X.append(self.dummy_suggest) using_dummy_suggest = True continue # Get the simple dict equivalent to suggestion. x_guess = desired_results[ii].configuration.data X.append(x_guess) # Now save the desired result for future use in observe. x_guess_ = OpentunerOptimizer.hashable_dict(x_guess) assert x_guess_ not in self.x_to_dr, "the suggestions should not already be in the x_to_dr dict" self.x_to_dr[x_guess_] = desired_results[ii] # This will also catch None from opentuner. assert isinstance(self.x_to_dr[x_guess_], DesiredResult) assert len(X) == n_suggestions, "incorrect number of suggestions provided by opentuner" # Log suggestion for repeating if opentuner gives up next time. We can # only do this when it is not already being used since it we will be # checking guesses against dummy_suggest in observe. if not using_dummy_suggest: self.dummy_suggest = X[-1] return X def observe(self, X, y): """Feed the observations back to opentuner. Parameters ---------- X : list of dict-like Places where the objective function has already been evaluated. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. y : array-like, shape (n,) Corresponding values where objective has been evaluated. """ assert len(X) == len(y) for x_guess, y_ in zip(X, y): x_guess_ = OpentunerOptimizer.hashable_dict(x_guess) # If we can't find the dr object then it must be the dummy guess. if x_guess_ not in self.x_to_dr: assert x_guess == self.dummy_suggest, "Appears to be guess that did not originate from suggest" continue # Get the corresponding DesiredResult object. dr = self.x_to_dr.pop(x_guess_, None) # This will also catch None from opentuner. assert isinstance(dr, DesiredResult), "DesiredResult object not available in x_to_dr" # Opentuner's arg names assume we are minimizing execution time. # So, if we want to minimize we have to pretend y is a 'time'. result = Result(time=y_) self.api.report_result(dr, result) opt_wrapper = OpentunerOptimizer ================================================ FILE: bayesmark/builtin_opt/pysot_optimizer.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import warnings from copy import copy import numpy as np from poap.strategy import EvalRecord from pySOT.experimental_design import SymmetricLatinHypercube from pySOT.optimization_problems import OptimizationProblem from pySOT.strategy import SRBFStrategy from pySOT.surrogate import CubicKernel, LinearTail, RBFInterpolant from bayesmark.abstract_optimizer import AbstractOptimizer from bayesmark.space import JointSpace class PySOTOptimizer(AbstractOptimizer): primary_import = "pysot" def __init__(self, api_config): """Build wrapper class to use an optimizer in benchmark. Parameters ---------- api_config : dict-like of dict-like Configuration of the optimization variables. See API description. """ AbstractOptimizer.__init__(self, api_config) self.space_x = JointSpace(api_config) self.bounds = self.space_x.get_bounds() self.create_opt_prob() # Sets up the optimization problem (needs self.bounds) self.max_evals = np.iinfo(np.int32).max # NOTE: Largest possible int self.batch_size = None self.history = [] self.proposals = [] def create_opt_prob(self): """Create an optimization problem object.""" opt = OptimizationProblem() opt.lb = self.bounds[:, 0] # In warped space opt.ub = self.bounds[:, 1] # In warped space opt.dim = len(self.bounds) opt.cont_var = np.arange(len(self.bounds)) opt.int_var = [] assert len(opt.cont_var) + len(opt.int_var) == opt.dim opt.objfun = None self.opt = opt def start(self, max_evals): """Starts a new pySOT run.""" self.history = [] self.proposals = [] # Symmetric Latin hypercube design des_pts = max([self.batch_size, 2 * (self.opt.dim + 1)]) slhd = SymmetricLatinHypercube(dim=self.opt.dim, num_pts=des_pts) # Warped RBF interpolant rbf = RBFInterpolant( dim=self.opt.dim, lb=self.opt.lb, ub=self.opt.ub, kernel=CubicKernel(), tail=LinearTail(self.opt.dim), eta=1e-4, ) # Optimization strategy self.strategy = SRBFStrategy( max_evals=self.max_evals, opt_prob=self.opt, exp_design=slhd, surrogate=rbf, asynchronous=True, batch_size=1, use_restarts=True, ) def suggest(self, n_suggestions=1): """Get a suggestion from the optimizer. Parameters ---------- n_suggestions : int Desired number of parallel suggestions in the output Returns ------- next_guess : list of dict List of `n_suggestions` suggestions to evaluate the objective function. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. """ if self.batch_size is None: # First call to suggest self.batch_size = n_suggestions self.start(self.max_evals) # Set the tolerances pretending like we are running batch d, p = float(self.opt.dim), float(n_suggestions) self.strategy.failtol = p * int(max(np.ceil(d / p), np.ceil(4 / p))) # Now we can make suggestions x_w = [] self.proposals = [] for _ in range(n_suggestions): proposal = self.strategy.propose_action() record = EvalRecord(proposal.args, status="pending") proposal.record = record proposal.accept() # This triggers all the callbacks # It is possible that pySOT proposes a previously evaluated point # when all variables are integers, so we just abort in this case # since we have likely converged anyway. See PySOT issue #30. x = list(proposal.record.params) # From tuple to list x_unwarped, = self.space_x.unwarp(x) if x_unwarped in self.history: warnings.warn("pySOT proposed the same point twice") self.start(self.max_evals) return self.suggest(n_suggestions=n_suggestions) # NOTE: Append unwarped to avoid rounding issues self.history.append(copy(x_unwarped)) self.proposals.append(proposal) x_w.append(copy(x_unwarped)) return x_w def _observe(self, x, y): # Find the matching proposal and execute its callbacks idx = [x == xx for xx in self.history] i = np.argwhere(idx)[0].item() # Pick the first index if there are ties proposal = self.proposals[i] proposal.record.complete(y) self.proposals.pop(i) self.history.pop(i) def observe(self, X, y): """Send an observation of a suggestion back to the optimizer. Parameters ---------- X : list of dict-like Places where the objective function has already been evaluated. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. y : array-like, shape (n,) Corresponding values where objective has been evaluated """ assert len(X) == len(y) for x_, y_ in zip(X, y): # Just ignore, any inf observations we got, unclear if right thing if np.isfinite(y_): self._observe(x_, y_) opt_wrapper = PySOTOptimizer ================================================ FILE: bayesmark/builtin_opt/random_optimizer.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import bayesmark.random_search as rs from bayesmark import np_util from bayesmark.abstract_optimizer import AbstractOptimizer class RandomOptimizer(AbstractOptimizer): # Unclear what is best package to list for primary_import here. primary_import = "bayesmark" def __init__(self, api_config, random=np_util.random): """Build wrapper class to use random search function in benchmark. Settings for `suggest_dict` can be passed using kwargs. Parameters ---------- api_config : dict-like of dict-like Configuration of the optimization variables. See API description. """ AbstractOptimizer.__init__(self, api_config) self.random = random def suggest(self, n_suggestions=1): """Get suggestion. Parameters ---------- n_suggestions : int Desired number of parallel suggestions in the output Returns ------- next_guess : list of dict List of `n_suggestions` suggestions to evaluate the objective function. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. """ x_guess = rs.suggest_dict([], [], self.api_config, n_suggestions=n_suggestions, random=self.random) return x_guess def observe(self, X, y): """Feed an observation back. Parameters ---------- X : list of dict-like Places where the objective function has already been evaluated. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. y : array-like, shape (n,) Corresponding values where objective has been evaluated """ # Random search so don't do anything pass # All optimizer wrappers need to assign their wrapper to the name opt_wrapper because experiment always tries to import # opt_wrapper regardless of the optimizer it is importing. opt_wrapper = RandomOptimizer ================================================ FILE: bayesmark/builtin_opt/scikit_optimizer.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np from scipy.interpolate import interp1d from skopt import Optimizer as SkOpt from skopt.space import Categorical, Integer, Real from bayesmark.abstract_optimizer import AbstractOptimizer class ScikitOptimizer(AbstractOptimizer): primary_import = "scikit-optimize" def __init__(self, api_config, base_estimator="GP", acq_func="gp_hedge", n_initial_points=5, **kwargs): """Build wrapper class to use an optimizer in benchmark. Parameters ---------- api_config : dict-like of dict-like Configuration of the optimization variables. See API description. base_estimator : {'GP', 'RF', 'ET', 'GBRT'} How to estimate the objective function. acq_func : {'LCB', 'EI', 'PI', 'gp_hedge', 'EIps', 'PIps'} Acquisition objective to decide next suggestion. n_initial_points : int Number of points to sample randomly before actual Bayes opt. """ AbstractOptimizer.__init__(self, api_config) dimensions, self.round_to_values = ScikitOptimizer.get_sk_dimensions(api_config) # Older versions of skopt don't copy over the dimensions names during # normalization and hence the names are missing in # self.skopt.space.dimensions. Therefore, we save our own copy of # dimensions list to be safe. If we can commit to using the newer # versions of skopt we can delete self.dimensions. self.dimensions_list = tuple(dd.name for dd in dimensions) # Undecided where we want to pass the kwargs, so for now just make sure # they are blank assert len(kwargs) == 0 self.skopt = SkOpt( dimensions, n_initial_points=n_initial_points, base_estimator=base_estimator, acq_func=acq_func, acq_optimizer="auto", acq_func_kwargs={}, acq_optimizer_kwargs={}, ) @staticmethod def get_sk_dimensions(api_config, transform="normalize"): """Help routine to setup skopt search space in constructor. Take api_config as argument so this can be static. """ # The ordering of iteration prob makes no difference, but just to be # safe and consistnent with space.py, I will make sorted. param_list = sorted(api_config.keys()) sk_dims = [] round_to_values = {} for param_name in param_list: param_config = api_config[param_name] param_type = param_config["type"] param_space = param_config.get("space", None) param_range = param_config.get("range", None) param_values = param_config.get("values", None) # Some setup for case that whitelist of values is provided: values_only_type = param_type in ("cat", "ordinal") if (param_values is not None) and (not values_only_type): assert param_range is None param_values = np.unique(param_values) param_range = (param_values[0], param_values[-1]) round_to_values[param_name] = interp1d( param_values, param_values, kind="nearest", fill_value="extrapolate" ) if param_type == "int": # Integer space in sklearn does not support any warping => Need # to leave the warping as linear in skopt. sk_dims.append(Integer(param_range[0], param_range[-1], transform=transform, name=param_name)) elif param_type == "bool": assert param_range is None assert param_values is None sk_dims.append(Integer(0, 1, transform=transform, name=param_name)) elif param_type in ("cat", "ordinal"): assert param_range is None # Leave x-form to one-hot as per skopt default sk_dims.append(Categorical(param_values, name=param_name)) elif param_type == "real": # Skopt doesn't support all our warpings, so need to pick # closest substitute it does support. prior = "log-uniform" if param_space in ("log", "logit") else "uniform" sk_dims.append(Real(param_range[0], param_range[-1], prior=prior, transform=transform, name=param_name)) else: assert False, "type %s not handled in API" % param_type return sk_dims, round_to_values def suggest(self, n_suggestions=1): """Get a suggestion from the optimizer. Parameters ---------- n_suggestions : int Desired number of parallel suggestions in the output Returns ------- next_guess : list of dict List of `n_suggestions` suggestions to evaluate the objective function. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. """ # First get list of lists from skopt.ask() next_guess = self.skopt.ask(n_points=n_suggestions) # Then convert to list of dicts next_guess = [dict(zip(self.dimensions_list, x)) for x in next_guess] # Now do the rounding, custom rounding is not supported in skopt. Note # that there is not nec a round function for each dimension here. for param_name, round_f in self.round_to_values.items(): for xx in next_guess: xx[param_name] = round_f(xx[param_name]) return next_guess def observe(self, X, y): """Send an observation of a suggestion back to the optimizer. Parameters ---------- X : list of dict-like Places where the objective function has already been evaluated. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. y : array-like, shape (n,) Corresponding values where objective has been evaluated """ # Supposedly skopt can handle blocks, but not sure about interface for # that. Just do loop to be safe for now. for xx, yy in zip(X, y): # skopt needs lists instead of dicts xx = [xx[dim_name] for dim_name in self.dimensions_list] # Just ignore, any inf observations we got, unclear if right thing if np.isfinite(yy): self.skopt.tell(xx, yy) opt_wrapper = ScikitOptimizer ================================================ FILE: bayesmark/cmd_parse.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Anything related to parsing command line arguments goes in here. There are some custom data structures to represent all the options available in the experiments here. Not currently any doc strings in this file because it may become obsolete with the use of fire package. """ import argparse import json import os.path import sys import uuid as pyuuid from enum import IntEnum, auto from pathlib import PosixPath import git from git.exc import InvalidGitRepositoryError from pathvalidate.argparse import sanitize_filename, validate_filename, validate_filepath from bayesmark.builtin_opt.config import CONFIG from bayesmark.constants import ARG_DELIM, DATA_LOADER_NAMES, METRICS, MODEL_NAMES, OPTIMIZERS_FILE, PY_INTERPRETER from bayesmark.path_util import absopen, abspath from bayesmark.util import shell_join assert not any(ARG_DELIM in opt for opt in MODEL_NAMES) assert not any(ARG_DELIM in opt for opt in DATA_LOADER_NAMES) class CmdArgs(IntEnum): uuid = auto() db_root = auto() optimizer_root = auto() data_root = auto() db = auto() optimizer = auto() data = auto() classifier = auto() metric = auto() n_calls = auto() n_suggest = auto() n_repeat = auto() n_jobs = auto() jobs_file = auto() ravel = auto() verbose = auto() dry_run = auto() rev = auto() opt_rev = auto() timeout = auto() CMD_STR = { CmdArgs.uuid: ("-u", "--uuid"), CmdArgs.db_root: ("-dir", "-db-root"), CmdArgs.optimizer_root: ("-odir", "--opt-root"), CmdArgs.data_root: ("-dr", "--data-root"), CmdArgs.db: ("-b", "--db"), CmdArgs.optimizer: ("-o", "--opt"), CmdArgs.data: ("-d", "--data"), CmdArgs.classifier: ("-c", "--classifier"), CmdArgs.metric: ("-m", "--metric"), CmdArgs.n_calls: ("-n", "--calls"), CmdArgs.n_suggest: ("-p", "--suggestions"), CmdArgs.n_repeat: ("-r", "--repeat"), CmdArgs.n_jobs: ("-nj", "--num-jobs"), CmdArgs.jobs_file: ("-ofile", "--jobs-file"), CmdArgs.ravel: ("-rv", "--ravel"), CmdArgs.verbose: ("-v", "--verbose"), CmdArgs.timeout: ("-t", "--timeout"), CmdArgs.dry_run: (None, "dry_run"), # Will not be specified from CLI CmdArgs.rev: (None, "rev"), # Will not be specified from CLI CmdArgs.opt_rev: (None, "opt_rev"), # Will not be specified from CLI. Which version of optimizer. } def arg_to_str(arg): # We can change this so it is arg.value, or someway to be usable by field interface _, dest = str(arg).split(".") return dest def namespace_to_dict(args_ns): args = vars(args_ns) args = {kk: args[arg_to_str(kk)] for kk in CMD_STR if (arg_to_str(kk) in args)} return args def serializable_dict(args): args_str = {CMD_STR[kk][1]: args[kk] for kk in CMD_STR if (kk in args)} assert len(args_str) == len(args) return args_str def unserializable_dict(args_str): args = {kk: args_str[CMD_STR[kk][1]] for kk in CMD_STR if (CMD_STR[kk][1] in args_str)} assert len(args_str) == len(args) return args def add_argument(parser, arg, **kwargs): short_name, long_name = CMD_STR[arg] dest = arg_to_str(arg) parser.add_argument(short_name, long_name, dest=dest, **kwargs) def filepath(value): """Work around for `pathvalidate` bug.""" if value == ".": return value validate_filepath(value, platform="auto") return value def filename(value): validate_filename(value, platform="universal") return value def uuid(val_str): val = str.lower(val_str) uuid_ = pyuuid.UUID(hex=val) assert val == uuid_.hex, "error in parsing uuid" return val def positive_int(val_str): val = int(val_str) if val <= 0: msg = "expected positive, got %s" % val_str raise argparse.ArgumentTypeError(msg) return val def joinable(val_str): val = str(val_str) # just for good measure validate_filename(val, platform="universal") # we choose to be at least as strict as filenames if ARG_DELIM in val: msg = "delimiter %s not allowed in choice %s" % (ARG_DELIM, val) raise argparse.ArgumentTypeError(msg) return val def load_rev_number(): # This function uses a lot of language "power features" that could be considered bad form: # 1) does a conditional import to get version # 2) uses __file__ to try and extract and git repo version during execution # We will let this fly anyway because: # 1) The results of this are only used for logging anyway # 2) This is a command parsing module of the code and inherently very non-pure and doing IO etc # 3) Unclear if there is a cleaner way to do this # Get rev from version file (if running inside the pip-installable wheel without the git repo) try: from bayesmark import version rev_file = version.VERSION except ImportError: rev_file = None else: rev_file = rev_file.strip() rev = rev_file # Get rev from git API if inside git repo (and not built wheel from pip install ...) wdir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) try: repo = git.Repo(path=wdir, search_parent_directories=False) except InvalidGitRepositoryError: rev_repo = None else: rev_repo = repo.head.commit.hexsha rev_repo = rev_repo.strip() rev = rev_repo # Check coherence of what we found if (rev_repo is None) and (rev_file is None): raise RuntimeError("Must specify version.py if not inside a git repo.") if (rev_repo is not None) and (rev_file is not None): assert rev_repo == rev_file, "Rev file %s does not match rev git %s" % (rev_file, rev_repo) assert rev == rev.strip() # We could first enforce is_lower_hex if we want to enforce that rev = rev[:7] return rev def base_parser(): parser = argparse.ArgumentParser(add_help=False) add_argument( parser, CmdArgs.db_root, default=".", type=filepath, help="root directory for all benchmark experiments output" ) add_argument( parser, CmdArgs.optimizer_root, default=".", type=filepath, help="Directory with optimization wrappers" ) # Always a verbose flag option add_argument(parser, CmdArgs.verbose, action="store_true", help="print the study logs to console") return parser def launcher_parser(description): parser = argparse.ArgumentParser(description=description, parents=[base_parser()]) add_argument(parser, CmdArgs.uuid, type=uuid, help="length 32 hex UUID for this experiment") add_argument(parser, CmdArgs.data_root, type=filepath, help="root directory for all custom csv files") add_argument(parser, CmdArgs.db, type=filename, help="database ID of this benchmark experiment") add_argument(parser, CmdArgs.optimizer, type=joinable, nargs="+", help="optimizers to use") add_argument(parser, CmdArgs.data, type=joinable, nargs="+", help="data sets to use") add_argument(parser, CmdArgs.classifier, type=joinable, nargs="+", help="classifiers to use") add_argument(parser, CmdArgs.metric, type=str, choices=METRICS, nargs="+", help="scoring metric to use") # Iterations counts used in experiments add_argument(parser, CmdArgs.n_calls, default=100, type=positive_int, help="number of function evaluations") add_argument( parser, CmdArgs.n_suggest, default=1, type=positive_int, help="number of suggestions to provide in parallel" ) add_argument(parser, CmdArgs.n_repeat, default=20, type=positive_int, help="number of repetitions of each study") add_argument(parser, CmdArgs.timeout, default=0, type=int, help="Timeout per experiment (0 = no timeout)") # Arguments for creating dry run jobs file add_argument( parser, CmdArgs.n_jobs, type=int, default=0, help="number of jobs to put in the dry run file, the default 0 value disables dry run (real run)", ) # Using default of current dir for jobs file output since that is generally the default for everything add_argument( parser, CmdArgs.jobs_file, type=filepath, default="./jobs.txt", help="a jobs file with all commands to be run" ) return parser def experiment_parser(description): parser = argparse.ArgumentParser(description=description, parents=[base_parser()]) add_argument(parser, CmdArgs.uuid, type=uuid, required=True, help="length 32 hex UUID for this experiment") # This could be made simpler and use '.' default for dataroot, even if no custom data used. add_argument(parser, CmdArgs.data_root, type=filepath, help="root directory for all custom csv files") add_argument(parser, CmdArgs.db, type=filename, required=True, help="database ID of this benchmark experiment") add_argument(parser, CmdArgs.optimizer, required=True, type=joinable, help="optimizer to use") add_argument(parser, CmdArgs.data, required=True, type=joinable, help="data set to use") add_argument(parser, CmdArgs.classifier, required=True, type=joinable, help="classifier to use") add_argument(parser, CmdArgs.metric, required=True, type=str, choices=METRICS, help="scoring metric to use") add_argument(parser, CmdArgs.n_calls, default=100, type=positive_int, help="number of function evaluations") add_argument( parser, CmdArgs.n_suggest, default=1, type=positive_int, help="number of suggestions to provide in parallel" ) return parser def agg_parser(description): parser = argparse.ArgumentParser(description=description, parents=[base_parser()]) add_argument(parser, CmdArgs.db, type=filename, required=True, help="database ID of this benchmark experiment") add_argument( parser, CmdArgs.ravel, action="store_true", help="ravel all studies to store batch suggestions as if they were serial (deprecated)", ) return parser def general_parser(description): parser = argparse.ArgumentParser(description=description, parents=[base_parser()]) add_argument(parser, CmdArgs.db, type=filename, required=True, help="database ID of this benchmark experiment") return parser def parse_args(parser, argv=None): """Note that this argument parser does not check compatibility between clf/reg metric and data set. """ args = parser.parse_args(argv) args = namespace_to_dict(args) args[CmdArgs.dry_run] = (CmdArgs.n_jobs in args) and (args[CmdArgs.n_jobs] > 0) # Does not check dir actually exists here, but whatever args[CmdArgs.jobs_file] = abspath(args[CmdArgs.jobs_file], verify=False) if args[CmdArgs.dry_run] else None # Then make sure all path vars are abspath: # Dry run might be executing on diff system => cannot verify yet args[CmdArgs.db_root] = abspath(args[CmdArgs.db_root], verify=not args[CmdArgs.dry_run]) args[CmdArgs.optimizer_root] = abspath(args[CmdArgs.optimizer_root], verify=True) if (CmdArgs.data_root in args) and (args[CmdArgs.data_root] is not None): args[CmdArgs.data_root] = abspath(args[CmdArgs.data_root], verify=not args[CmdArgs.dry_run]) # Get git version of the benchmark itself for meta-data, just in case we need it. args[CmdArgs.rev] = load_rev_number() # We may support ability to specify version at args in the future, from now it is implied args[CmdArgs.opt_rev] = None return args def _cleanup(filename_str): filename_str = sanitize_filename(filename_str, replacement_text="-", platform="universal") filename_str = filename_str.replace(ARG_DELIM, "-") return filename_str def infer_settings(opt_root, opt_pattern="**/optimizer.py"): opt_root = PosixPath(opt_root) assert opt_root.is_dir(), "Opt root directory doesn't exist: %s" % opt_root assert opt_root.is_absolute(), "Only absolute path should have even gotten this far." # Always sort for reproducibility source_files = sorted(opt_root.glob(opt_pattern)) source_files = [ss.relative_to(opt_root) for ss in source_files] settings = {_cleanup(str(ss.parent)): [str(ss), {}] for ss in source_files} assert all(joinable(kk) for kk in settings), "Something went wrong in name sanitization." assert len(settings) == len(source_files), "Name collision after sanitization of %s" % repr(source_files) assert len(set(CONFIG.keys()) & set(settings.keys())) == 0, "Name collision with builtin optimizers." return settings def load_optimizer_settings(opt_root): try: with absopen(os.path.join(opt_root, OPTIMIZERS_FILE), "r") as f: settings = json.load(f) except FileNotFoundError: # Search for optimizers instead settings = infer_settings(opt_root) assert isinstance(settings, dict) assert not any((ARG_DELIM in opt) for opt in settings), "optimizer names violates name convention" return settings def cmd_str(): cmd = "%s %s" % (PY_INTERPRETER, shell_join(sys.argv)) return cmd ================================================ FILE: bayesmark/constants.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """General constants that are used in multiple files in the code base. """ # Special constant for random search since it gets used as our reference point in the baselines RANDOM_SEARCH = "RandomSearch" OPTIMIZERS_FILE = "config.json" ARG_DELIM = "_" # Delimeter used when concat cmd argument for any reason PY_INTERPRETER = "python" # What command to call for sub process, we could specify version number here also. # Variables to save in SAL EVAL = "eval" TIME = "time" SUGGEST_LOG = "suggest_log" EXP_VARS = (EVAL, TIME, SUGGEST_LOG) # Derived variables to save in SAL TIME_RESULTS = "time" EVAL_RESULTS = "eval" BASELINE = "baseline" PERF_RESULTS = "perf" MEAN_SCORE = "summary" # Coordinate dim names needed in saved xr Datasets ITER = "iter" TEST_CASE = "function" METHOD = "optimizer" TRIAL = "study_id" SUGGEST = "suggestion" OBJECTIVE = "objective" # Dataset variables for eval results VISIBLE_TO_OPT = "_visible_to_opt" # Dataset variables for time results SUGGEST_PHASE = "suggest" OBS_PHASE = "observe" EVAL_PHASE = "eval" EVAL_PHASE_SUM = "eval_sum" EVAL_PHASE_MAX = "eval_max" # Dataset variables for aggregate results PERF_MED = "median" LB_MED = "median LB" UB_MED = "median UB" NORMED_MED = "median normed" PERF_MEAN = "mean" LB_MEAN = "mean LB" UB_MEAN = "mean UB" NORMED_MEAN = "mean normed" LB_NORMED_MEAN = "mean normed LB" UB_NORMED_MEAN = "mean normed UB" PERF_BEST = "best" PERF_CLIP = "clip" # Choices used for test problems, there is some redundant specification with sklearn funcs file here MODEL_NAMES = ("DT", "MLP-adam", "MLP-sgd", "RF", "SVM", "ada", "kNN", "lasso", "linear") DATA_LOADER_NAMES = ("breast", "digits", "iris", "wine", "boston", "diabetes") SCORERS_CLF = ("nll", "acc") SCORERS_REG = ("mae", "mse") METRICS = tuple(sorted(SCORERS_CLF + SCORERS_REG)) ================================================ FILE: bayesmark/data.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Module to deal with all matters relating to loading example data sets, which we tune ML models to. """ from enum import IntEnum, auto import numpy as np import pandas as pd # only needed for csv reader, maybe try something else from sklearn import datasets from bayesmark.constants import DATA_LOADER_NAMES, SCORERS_CLF, SCORERS_REG from bayesmark.path_util import join_safe_r from bayesmark.stats import robust_standardize class ProblemType(IntEnum): """The different problem types we consider. Currently, just regression (`reg`) and classification (`clf`). """ clf = auto() reg = auto() DATA_LOADERS = { "digits": (datasets.load_digits, ProblemType.clf), "iris": (datasets.load_iris, ProblemType.clf), "wine": (datasets.load_wine, ProblemType.clf), "breast": (datasets.load_breast_cancer, ProblemType.clf), "boston": (datasets.load_boston, ProblemType.reg), "diabetes": (datasets.load_diabetes, ProblemType.reg), } assert sorted(DATA_LOADERS.keys()) == sorted(DATA_LOADER_NAMES) # Arguably, this could go in constants, but doesn't cause extra imports being here. METRICS_LOOKUP = {ProblemType.clf: SCORERS_CLF, ProblemType.reg: SCORERS_REG} def get_problem_type(dataset_name): """Determine if this dataset is a regression of classification problem. Parameters ---------- dataset : str Which data set to use, must be key in `DATA_LOADERS` dict, or name of custom csv file. Returns ------- problem_type : ProblemType `Enum` to indicate if regression of classification data set. """ if dataset_name in DATA_LOADERS: _, problem_type = DATA_LOADERS[dataset_name] return problem_type # Maybe we can come up with a better system, but for now let's use a convention based on the naming of the csv file. if dataset_name.startswith("reg-"): return ProblemType.reg if dataset_name.startswith("clf-"): return ProblemType.clf assert False, "Can't determine problem type from dataset name." def _csv_loader(dataset_name, return_X_y, data_root, clip_x=100): # pragma: io """Load custom csv files for use in the benchmark. This function assumes ``dataset_name + ".csv"`` is a csv file found in the `data_root` path. It also assumes the last column of the csv file is the target and the other columns are features. The target column should be `int` for classification and `float` for regression. Column names ending in ``"_cat"`` are assumed to be categorical and will be one-hot encoded. The features (and target for regression) are robust standardized. The features are also clipped to be in ``[-clip_x, clip_x]`` *after* standardization. """ assert return_X_y, "Only returning (X,y) tuple supported right now." assert clip_x >= 0 # Quantile range for robust standardization. The 86% range is the most efficient for Gaussians. See: # https://github.com/scikit-learn/scikit-learn/issues/10139#issuecomment-344705040 q_level = 0.86 path = join_safe_r(data_root, dataset_name + ".csv") # For now, use convention that can get problem type based on data set name problem_type = get_problem_type(dataset_name) # Assuming no missing data in source csv files at the moment, these will # result in error. df = pd.read_csv( path, header=0, index_col=False, engine="c", na_filter=False, true_values=["true"], false_values=["false"] ) label = df.columns[-1] # Assume last col is target target = df.pop(label).values if problem_type == ProblemType.clf: assert target.dtype in (np.bool_, np.int_) target = target.astype(np.int_) # convert to int for skl if problem_type == ProblemType.reg: assert target.dtype == np.float_ # 86% range is the most efficient (at least for Gaussians) target = robust_standardize(target, q_level=q_level) # Fill in an categorical variables (object dtype of cols names ..._cat) cat_cols = sorted(cc for cc in df.columns if cc.endswith("_cat") or df[cc].dtype.kind == "O") df = pd.get_dummies(df, columns=cat_cols, drop_first=True, dtype=np.float_) # Could also sort all columns to be sure it will be reprod # Everything should now be in float assert (df.dtypes == np.float_).all() data = df.values data = robust_standardize(data, q_level=q_level) # Debatable if we should include this, but there are a lot of outliers data = np.clip(data, -clip_x, clip_x) # We should probably do some logging or something to wrap up return data, target, problem_type def load_data(dataset_name, data_root=None): # pragma: io """Load a data set and return it in, pre-processed into numpy arrays. Parameters ---------- dataset : str Which data set to use, must be key in `DATA_LOADERS` dict, or name of custom csv file. data_root : str Root directory to look for all custom csv files. May be ``None`` for sklearn data sets. Returns ------- data : :class:`numpy:numpy.ndarray` of shape (n, d) The feature matrix of the data set. It will be `float` array. target : :class:`numpy:numpy.ndarray` of shape (n,) The target vector for the problem, which is `int` for classification and `float` for regression. problem_type : :class:`bayesmark.data.ProblemType` `Enum` to indicate if regression of classification data set. """ if dataset_name in DATA_LOADERS: loader_f, problem_type = DATA_LOADERS[dataset_name] data, target = loader_f(return_X_y=True) else: # try to load as custom csv assert data_root is not None, "data root cannot be None when custom csv requested." data, target, problem_type = _csv_loader(dataset_name, return_X_y=True, data_root=data_root) return data, target, problem_type ================================================ FILE: bayesmark/expected_max.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Compute expected maximum or minimum from iid samples. """ import numpy as np from scipy.special import gammaln, logsumexp def get_expected_max_weights(n, m): """Get the L-estimator weights for computing unbiased estimator of expected ``max(x[1:m])`` on a data set. Parameters ---------- n : int Number of data points in data set ``len(x)``. Must be ``>= 1``. m : `int` or :class:`numpy:numpy.ndarray` with dtype `int` This function is for estimating the expected maximum over `m` iid draws. Require ``m >= 1``. This can be broadcasted. If ``m > n``, the weights will be nan, because there is no way to get unbiased estimate in that case. Returns ------- pdf : :class:`numpy:numpy.ndarray`, shape (n,) The weights for L-estimator. Will be positive and sum to one. """ assert np.ndim(n) == 0 assert n >= 1 # otherwise makes no sense m = np.asarray(m) # Must be np type for broadcasting # We could also check dtype is int, but not bothering here assert np.all(m >= 1) # otherwise makes no sense m = m[..., None] kk = 1 + np.arange(n) lpdf = gammaln(kk) - gammaln(kk - (m - 1)) pdf = np.exp(lpdf - logsumexp(lpdf, axis=-1, keepdims=True)) # expect nan for m > n assert np.all((m > n) | np.isclose(np.sum(pdf, axis=-1, keepdims=True), 1.0)) return pdf def expected_max(x, m): """Compute unbiased estimator of expected ``max(x[1:m])`` on a data set. Parameters ---------- x : :class:`numpy:numpy.ndarray` of shape (n,) Data set we would like expected ``max(x[1:m])`` on. m : `int` or :class:`numpy:numpy.ndarray` with dtype `int` This function is for estimating the expected maximum over `m` iid draws. Require ``m >= 1``. This can be broadcasted. If ``m > n``, the weights will be nan, because there is no way to get unbiased estimate in that case. Returns ------- E_max_x : float Unbiased estimate of mean max of `m` draws from distribution on `x`. """ assert np.ndim(x) == 1 # m is validated by get_expected_max_weights # Get order stats for L-estimator x = np.array(x, copy=True) # we will modify in place x.sort() # in place!! # Now get estimator weights n, = x.shape if n == 0: return np.full(np.shape(m), np.nan) pdf = get_expected_max_weights(n, m) # Compute L-estimator E_max_x = np.sum(x * pdf, axis=-1) return E_max_x def expected_min(x, m): """Compute unbiased estimator of expected ``min(x[1:m])`` on a data set. Parameters ---------- x : :class:`numpy:numpy.ndarray` of shape (n,) Data set we would like expected ``min(x[1:m])`` on. Require ``len(x) >= 1``. m : `int` or :class:`numpy:numpy.ndarray` with dtype `int` This function is for estimating the expected minimum over `m` iid draws. Require ``m >= 1``. This can be broadcasted. If ``m > n``, the weights will be nan, because there is no way to get unbiased estimate in that case. Returns ------- E_min_x : float Unbiased estimate of mean min of `m` draws from distribution on `x`. """ x = np.asarray(x) E_min_x = -expected_max(-x, m) return E_min_x ================================================ FILE: bayesmark/experiment.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Perform a study. """ import json import logging import random as pyrandom import uuid import warnings from collections import OrderedDict from time import sleep, time import numpy as np import xarray as xr import bayesmark.cmd_parse as cmd import bayesmark.constants as cc import bayesmark.random_search as rs from bayesmark.builtin_opt.config import CONFIG from bayesmark.cmd_parse import CmdArgs from bayesmark.constants import ARG_DELIM, ITER, OBJECTIVE, SUGGEST from bayesmark.data import METRICS_LOOKUP, get_problem_type from bayesmark.np_util import argmin_2d, linear_rescale, random_seed from bayesmark.serialize import XRSerializer from bayesmark.signatures import analyze_signature_pair, get_func_signature from bayesmark.sklearn_funcs import SklearnModel, SklearnSurrogate from bayesmark.space import JointSpace from bayesmark.util import chomp, str_join_safe logger = logging.getLogger(__name__) # For now treat the objective names as global const. However, in the future these could vary by type of problem. OBJECTIVE_NAMES = SklearnModel.objective_names def _build_test_problem(model_name, dataset, scorer, path): """Build the class with the class to use an objective. Sort of a factory. Parameters ---------- model_name : str Which sklearn model we are attempting to tune, must be an element of `constants.MODEL_NAMES`. dataset : str Which data set the model is being tuned to, which must be either a) an element of `constants.DATA_LOADER_NAMES`, or b) the name of a csv file in the `data_root` folder for a custom data set. scorer : str Which metric to use when evaluating the model. This must be an element of `sklearn_funcs.SCORERS_CLF` for classification models, or `sklearn_funcs.SCORERS_REG` for regression models. path : str or None Absolute path to folder containing custom data sets/pickle files with surrogate model. Returns ------- prob : :class:`.sklearn_funcs.TestFunction` The test function to evaluate in experiments. """ if model_name.endswith("-surr"): # Requires IO to test these, so will add the pargma here. Maybe that points towards a possible design change. model_name = chomp(model_name, "-surr") # pragma: io prob = SklearnSurrogate(model_name, dataset, scorer, path=path) # pragma: io else: prob = SklearnModel(model_name, dataset, scorer, data_root=path) return prob def run_study(optimizer, test_problem, n_calls, n_suggestions, n_obj=1, callback=None): """Run a study for a single optimizer on a single test problem. This function can be used for benchmarking on general stateless objectives (not just `sklearn`). Parameters ---------- optimizer : :class:`.abstract_optimizer.AbstractOptimizer` Instance of one of the wrapper optimizers. test_problem : :class:`.sklearn_funcs.TestFunction` Instance of test function to attempt to minimize. n_calls : int How many iterations of minimization to run. n_suggestions : int How many parallel evaluation we run each iteration. Must be ``>= 1``. n_obj : int Number of different objectives measured, only objective 0 is seen by optimizer. Must be ``>= 1``. callback : callable Optional callback taking the current best function evaluation, and the number of iterations finished. Takes array of shape `(n_obj,)`. Returns ------- function_evals : :class:`numpy:numpy.ndarray` of shape (n_calls, n_suggestions, n_obj) Value of objective for each evaluation. timing_evals : (:class:`numpy:numpy.ndarray`, :class:`numpy:numpy.ndarray`, :class:`numpy:numpy.ndarray`) Tuple of 3 timing results: ``(suggest_time, eval_time, observe_time)`` with shapes ``(n_calls,)``, ``(n_calls, n_suggestions)``, and ``(n_calls,)``. These are the time to make each suggestion, the time for each evaluation of the objective function, and the time to make an observe call. suggest_log : list(list(dict(str, object))) Log of the suggestions corresponding to the `function_evals`. """ assert n_suggestions >= 1, "batch size must be at least 1" assert n_obj >= 1, "Must be at least one objective" space_for_validate = JointSpace(test_problem.get_api_config()) if callback is not None: # First do initial log at inf score, in case we don't even get to first eval before crash/job timeout callback(np.full((n_obj,), np.inf, dtype=float), 0) suggest_time = np.zeros(n_calls) observe_time = np.zeros(n_calls) eval_time = np.zeros((n_calls, n_suggestions)) function_evals = np.zeros((n_calls, n_suggestions, n_obj)) suggest_log = [None] * n_calls for ii in range(n_calls): tt = time() try: next_points = optimizer.suggest(n_suggestions) except Exception as e: logger.warning("Failure in optimizer suggest. Falling back to random search.") logger.exception(e, exc_info=True) print(json.dumps({"optimizer_suggest_exception": {ITER: ii}})) api_config = test_problem.get_api_config() next_points = rs.suggest_dict([], [], api_config, n_suggestions=n_suggestions) suggest_time[ii] = time() - tt logger.info("suggestion time taken %f iter %d next_points %s" % (suggest_time[ii], ii, str(next_points))) assert len(next_points) == n_suggestions, "invalid number of suggestions provided by the optimizer" # We could put this inside the TestProblem class, but ok here for now. try: space_for_validate.validate(next_points) # Fails if suggestions outside allowed range except Exception: raise ValueError("Optimizer suggestion is out of range.") for jj, next_point in enumerate(next_points): tt = time() try: f_current_eval = test_problem.evaluate(next_point) except Exception as e: logger.warning("Failure in function eval. Setting to inf.") logger.exception(e, exc_info=True) f_current_eval = np.full((n_obj,), np.inf, dtype=float) eval_time[ii, jj] = time() - tt assert np.shape(f_current_eval) == (n_obj,) suggest_log[ii] = next_points function_evals[ii, jj, :] = f_current_eval logger.info( "function_evaluation time %f value %f suggestion %s" % (eval_time[ii, jj], f_current_eval[0], str(next_point)) ) # Note: this could be inf in the event of a crash in f evaluation, the optimizer must be able to handle that. # Only objective 0 is seen by optimizer. eval_list = function_evals[ii, :, 0].tolist() if callback is not None: idx_ii, idx_jj = argmin_2d(function_evals[: ii + 1, :, 0]) callback(function_evals[idx_ii, idx_jj, :], ii + 1) tt = time() try: optimizer.observe(next_points, eval_list) except Exception as e: logger.warning("Failure in optimizer observe. Ignoring these observations.") logger.exception(e, exc_info=True) print(json.dumps({"optimizer_observe_exception": {ITER: ii}})) observe_time[ii] = time() - tt logger.info( "observation time %f, current best %f at iter %d" % (observe_time[ii], np.min(function_evals[: ii + 1, :, 0]), ii) ) return function_evals, (suggest_time, eval_time, observe_time), suggest_log def run_sklearn_study( opt_class, opt_kwargs, model_name, dataset, scorer, n_calls, n_suggestions, data_root=None, callback=None ): """Run a study for a single optimizer on a single `sklearn` model/data set combination. This routine is meant for benchmarking when tuning `sklearn` models, as opposed to the more general :func:`.run_study`. Parameters ---------- opt_class : :class:`.abstract_optimizer.AbstractOptimizer` Type of wrapper optimizer must be subclass of :class:`.abstract_optimizer.AbstractOptimizer`. opt_kwargs : kwargs `kwargs` to use when instantiating the wrapper class. model_name : str Which sklearn model we are attempting to tune, must be an element of `constants.MODEL_NAMES`. dataset : str Which data set the model is being tuned to, which must be either a) an element of `constants.DATA_LOADER_NAMES`, or b) the name of a csv file in the `data_root` folder for a custom data set. scorer : str Which metric to use when evaluating the model. This must be an element of `sklearn_funcs.SCORERS_CLF` for classification models, or `sklearn_funcs.SCORERS_REG` for regression models. n_calls : int How many iterations of minimization to run. n_suggestions : int How many parallel evaluation we run each iteration. Must be ``>= 1``. data_root : str Absolute path to folder containing custom data sets. This may be ``None`` if no custom data sets are used.`` callback : callable Optional callback taking the current best function evaluation, and the number of iterations finished. Takes array of shape `(n_obj,)`. Returns ------- function_evals : :class:`numpy:numpy.ndarray` of shape (n_calls, n_suggestions, n_obj) Value of objective for each evaluation. timing_evals : (:class:`numpy:numpy.ndarray`, :class:`numpy:numpy.ndarray`, :class:`numpy:numpy.ndarray`) Tuple of 3 timing results: ``(suggest_time, eval_time, observe_time)`` with shapes ``(n_calls,)``, ``(n_calls, n_suggestions)``, and ``(n_calls,)``. These are the time to make each suggestion, the time for each evaluation of the objective function, and the time to make an observe call. suggest_log : list(list(dict(str, object))) Log of the suggestions corresponding to the `function_evals`. """ # Setup test function function_instance = _build_test_problem(model_name, dataset, scorer, data_root) # Setup optimizer api_config = function_instance.get_api_config() optimizer_instance = opt_class(api_config, **opt_kwargs) assert function_instance.objective_names == OBJECTIVE_NAMES assert OBJECTIVE_NAMES[0] == cc.VISIBLE_TO_OPT n_obj = len(OBJECTIVE_NAMES) # Now actually do the experiment function_evals, timing, suggest_log = run_study( optimizer_instance, function_instance, n_calls, n_suggestions, n_obj=n_obj, callback=callback ) return function_evals, timing, suggest_log def get_objective_signature(model_name, dataset, scorer, data_root=None): """Get signature of an objective function specified by an sklearn model and dataset. This routine specializes :func:`.signatures.get_func_signature` for the `sklearn` study case. Parameters ---------- model_name : str Which sklearn model we are attempting to tune, must be an element of `constants.MODEL_NAMES`. dataset : str Which data set the model is being tuned to, which must be either a) an element of `constants.DATA_LOADER_NAMES`, or b) the name of a csv file in the `data_root` folder for a custom data set. scorer : str Which metric to use when evaluating the model. This must be an element of `sklearn_funcs.SCORERS_CLF` for classification models, or `sklearn_funcs.SCORERS_REG` for regression models. data_root : str Absolute path to folder containing custom data sets. This may be ``None`` if no custom data sets are used.`` Returns ------- signature : list(str) The signature of this test function. """ function_instance = _build_test_problem(model_name, dataset, scorer, data_root) api_config = function_instance.get_api_config() signature = get_func_signature(function_instance.evaluate, api_config) return signature def build_eval_ds(function_evals, objective_names): """Convert :class:`numpy:numpy.ndarray` with function evaluations to :class:`xarray:xarray.Dataset`. This function is a data cleanup routine after running an experiment, before serializing the data to end the study. Parameters ---------- function_evals : :class:`numpy:numpy.ndarray` of shape (n_calls, n_suggestions, n_obj) Value of objective for each evaluation. objective_names : list(str) of shape (n_obj,) The names of each objective. Returns ------- eval_ds : :class:`xarray:xarray.Dataset` :class:`xarray:xarray.Dataset` containing one variable for each objective with the objective function evaluations. It has dimensions ``(ITER, SUGGEST)``. """ n_call, n_suggest, n_obj = np.shape(function_evals) assert len(objective_names) == n_obj assert len(set(objective_names)) == n_obj, "Objective names must be unique" coords = {ITER: range(n_call), SUGGEST: range(n_suggest), OBJECTIVE: list(objective_names)} dims = (ITER, SUGGEST, OBJECTIVE) da = xr.DataArray(data=function_evals, coords=coords, dims=dims) eval_ds = da.to_dataset(dim=OBJECTIVE) return eval_ds def build_timing_ds(suggest_time, eval_time, observe_time): """Convert :class:`numpy:numpy.ndarray` with timing evaluations to :class:`xarray:xarray.Dataset`. This function is a data cleanup routine after running an experiment, before serializing the data to end the study. Parameters ---------- suggest_time : :class:`numpy:numpy.ndarray` of shape (n_calls,) The time to make each (batch) suggestion. eval_time : :class:`numpy:numpy.ndarray` of shape (n_calls, n_suggestions) The time for each evaluation of the objective function. observe_time : :class:`numpy:numpy.ndarray` of shape (n_calls,) The time for each (batch) evaluation of the objective function, and the time to make an observe call. Returns ------- time_ds : :class:`xarray:xarray.Dataset` Dataset with variables ``(SUGGEST_PHASE, EVAL_PHASE, OBS_PHASE)`` which have dimensions ``(ITER,)``, ``(ITER, SUGGEST)``, and ``(ITER,)``, respectively. The variable `EVAL_PHASE` has the function evaluation time for each parallel suggestion. """ n_call, n_suggest = np.shape(eval_time) assert np.shape(suggest_time) == (n_call,) assert np.shape(observe_time) == (n_call,) coords = OrderedDict([(ITER, range(n_call)), (SUGGEST, range(n_suggest))]) data = OrderedDict() data[cc.SUGGEST_PHASE] = ((ITER,), suggest_time) data[cc.EVAL_PHASE] = ((ITER, SUGGEST), eval_time) data[cc.OBS_PHASE] = ((ITER,), observe_time) time_ds = xr.Dataset(data, coords=coords) return time_ds def build_suggest_ds(suggest_log): """Convert :class:`numpy:numpy.ndarray` with function evaluation inputs to :class:`xarray:xarray.Dataset`. This function is a data cleanup routine after running an experiment, before serializing the data to end the study. Parameters ---------- suggest_log : list(list(dict(str, object))) Log of the suggestions. It has shape `(n_call, n_suggest)`. Returns ------- suggest_ds : :class:`xarray:xarray.Dataset` :class:`xarray:xarray.Dataset` containing one variable for each input with the objective function evaluations. It has dimensions ``(ITER, SUGGEST)``. """ n_call, n_suggest = np.shape(suggest_log) assert n_call * n_suggest > 0 # Setup the dims ds_vars = sorted(suggest_log[0][0].keys()) coords = OrderedDict([(ITER, range(n_call)), (SUGGEST, range(n_suggest))]) # There is prob a way to vectorize this more but good enough for now. Using np.full to infer dtype from 1st element data = OrderedDict([(kk, ((ITER, SUGGEST), np.full((n_call, n_suggest), suggest_log[0][0][kk]))) for kk in ds_vars]) for ii in range(n_call): for jj in range(n_suggest): for kk in ds_vars: data[kk][1][ii, jj] = suggest_log[ii][jj][kk] suggest_ds = xr.Dataset(data, coords=coords) return suggest_ds def load_optimizer_kwargs(optimizer_name, opt_root): # pragma: io """Load the kwarg options for this optimizer being tested. This is part of the general experiment setup before a study. Parameters ---------- optimizer_name : str Name of the optimizer being tested. This optimizer name must be present in optimizer config file. opt_root : str Absolute path to folder containing the config file. Returns ------- kwargs : dict(str, object) The kwargs setting to pass into the optimizer wrapper constructor. """ if optimizer_name in CONFIG: _, kwargs = CONFIG[optimizer_name] else: settings = cmd.load_optimizer_settings(opt_root) assert optimizer_name in settings, "optimizer %s not found in settings file %s" % optimizer_name _, kwargs = settings[optimizer_name] return kwargs def _setup_seeds(hex_str): # pragma: main """This function should only be called from main. Be careful with this function as it manipulates the global random streams. This is part of the general experiment setup before a study. If torch becomes used in any of our optimizers then this will need to come back, could also do TF seed init. ``` torch.manual_seed(random_seed(master_stream)) if torch.cuda.is_available(): torch.cuda.manual_seed(random_seed(master_stream)) ``` """ # Set all random seeds: avoid correlated streams ==> must use diff seeds. # Could use UUID class, but more direct to just convert the hex to py int. # pyrandom is better for master because it is not limited to 32-bit seeds. master_stream = pyrandom.Random(int(hex_str, 16)) pyrandom.seed(random_seed(master_stream)) np.random.seed(random_seed(master_stream)) def experiment_main(opt_class, args=None): # pragma: main """This is in effect the `main` routine for this experiment. However, it is called from the optimizer wrapper file so the class can be passed in. The optimizers are assumed to be outside the package, so the optimizer class can't be named from inside the main function without using hacky stuff like `eval`. """ if args is None: description = "Run a study with one benchmark function and an optimizer" args = cmd.parse_args(cmd.experiment_parser(description)) args[CmdArgs.opt_rev] = opt_class.get_version() run_uuid = uuid.UUID(args[CmdArgs.uuid]) logging.captureWarnings(True) # Setup logging to both a file and stdout (if verbose is set to True) logger.setLevel(logging.INFO) # Note this is the module-wide logger logfile = XRSerializer.logging_path(args[CmdArgs.db_root], args[CmdArgs.db], run_uuid) logger_file_handler = logging.FileHandler(logfile, mode="w") logger.addHandler(logger_file_handler) if args[CmdArgs.verbose]: logger.addHandler(logging.StreamHandler()) warnings_logger = logging.getLogger("py.warnings") warnings_logger.addHandler(logger_file_handler) if args[CmdArgs.verbose]: warnings_logger.addHandler(logging.StreamHandler()) logger.info("running: %s" % str(cmd.serializable_dict(args))) logger.info("cmd: %s" % cmd.cmd_str()) assert ( args[CmdArgs.metric] in METRICS_LOOKUP[get_problem_type(args[CmdArgs.data])] ), "reg/clf metrics can only be used on compatible dataset" # Setup random streams for computing the signature, must use same seed # across all runs to ensure signature is consistent. This seed is random: _setup_seeds("7e9f2cabb0dd4f44bc10cf18e440b427") # pragma: allowlist secret signature = get_objective_signature( args[CmdArgs.classifier], args[CmdArgs.data], args[CmdArgs.metric], data_root=args[CmdArgs.data_root] ) logger.info("computed signature: %s" % str(signature)) opt_kwargs = load_optimizer_kwargs(args[CmdArgs.optimizer], args[CmdArgs.optimizer_root]) # Setup the call back for intermediate logging if cc.BASELINE not in XRSerializer.get_derived_keys(args[CmdArgs.db_root], db=args[CmdArgs.db]): warnings.warn("Baselines not found. Will not log intermediate scores.") callback = None else: test_case_str = SklearnModel.test_case_str(args[CmdArgs.classifier], args[CmdArgs.data], args[CmdArgs.metric]) optimizer_str = str_join_safe(ARG_DELIM, (args[CmdArgs.optimizer], args[CmdArgs.opt_rev], args[CmdArgs.rev])) baseline_ds, baselines_meta = XRSerializer.load_derived( args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.BASELINE ) # Check the objective function signatures match in the baseline file sig_errs, _ = analyze_signature_pair({test_case_str: signature[1]}, baselines_meta["signature"]) logger.info("Signature errors:\n%s" % sig_errs.to_string()) print(json.dumps({"exp sig errors": sig_errs.T.to_dict()})) def log_mean_score_json(evals, iters): assert evals.shape == (len(OBJECTIVE_NAMES),) assert not np.any(np.isnan(evals)) log_msg = { cc.TEST_CASE: test_case_str, cc.METHOD: optimizer_str, cc.TRIAL: args[CmdArgs.uuid], cc.ITER: iters, } for idx, obj in enumerate(OBJECTIVE_NAMES): assert OBJECTIVE_NAMES[idx] == obj # Extract relevant rescaling info slice_ = {cc.TEST_CASE: test_case_str, OBJECTIVE: obj} best_opt = baseline_ds[cc.PERF_BEST].sel(slice_, drop=True).values.item() base_clip_val = baseline_ds[cc.PERF_CLIP].sel(slice_, drop=True).values.item() # Perform the same rescaling as found in experiment_analysis.compute_aggregates() score = linear_rescale(evals[idx], best_opt, base_clip_val, 0.0, 1.0, enforce_bounds=False) # Also, clip the score from below at -1 to limit max influence of single run on final average score = np.clip(score, -1.0, 1.0) score = score.item() # Make easiest for logging in JSON assert isinstance(score, float) # Note: This is not the raw score but the rescaled one! log_msg[obj] = score log_msg = json.dumps(log_msg) print(log_msg, flush=True) # One second safety delay to protect against subprocess stdout getting lost sleep(1) callback = log_mean_score_json # Now set the seeds for the actual experiment _setup_seeds(args[CmdArgs.uuid]) # Now do the experiment logger.info( "starting sklearn study %s %s %s %s %d %d" % ( args[CmdArgs.optimizer], args[CmdArgs.classifier], args[CmdArgs.data], args[CmdArgs.metric], args[CmdArgs.n_calls], args[CmdArgs.n_suggest], ) ) logger.info("with data root: %s" % args[CmdArgs.data_root]) function_evals, timing, suggest_log = run_sklearn_study( opt_class, opt_kwargs, args[CmdArgs.classifier], args[CmdArgs.data], args[CmdArgs.metric], args[CmdArgs.n_calls], args[CmdArgs.n_suggest], data_root=args[CmdArgs.data_root], callback=callback, ) # Curate results into clean dataframes eval_ds = build_eval_ds(function_evals, OBJECTIVE_NAMES) time_ds = build_timing_ds(*timing) suggest_ds = build_suggest_ds(suggest_log) # setup meta: meta = {"args": cmd.serializable_dict(args), "signature": signature} logger.info("saving meta data: %s" % str(meta)) # Now the final IO to export the results logger.info("saving results") XRSerializer.save(eval_ds, meta, args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.EVAL, uuid_=run_uuid) logger.info("saving timing") XRSerializer.save(time_ds, meta, args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.TIME, uuid_=run_uuid) logger.info("saving suggest log") XRSerializer.save(suggest_ds, meta, args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.SUGGEST_LOG, uuid_=run_uuid) logger.info("done") def _get_opt_class(opt_name): """Load the relevant wrapper class based on this optimizer name. There is inherently a bit ugly, but is only called at the main() level before the inner workings get going. There are a few ways to do this with some pro and con: 1) The way done here: based on the filename, load that module via conditional imports and if-else. cons: - uses conditional imports - must manually repeat yourself in the if-else, but these are checked in unit testing 2) Import everything and then pick the right optimizer based on a dict of name_str -> class. cons: - loads every dependency no matter which is used so could be slow - also a stupid dependency might change global state in a way that corrupts experiments 3) Use the wrapper file as the entry point and add that to setup.py. cons: - Will clutter the CLI namespace with one command for each wrapper 4) Use importlib to import the specified file. cons: - Makes assumptions about relative path structure. For pip-installed packages, probably safer to let python find the file via import. This option (1) seems least objectionable. However, this function could easily be switched to use importlib without any changes elsewhere. """ wrapper_file, _ = CONFIG[opt_name] if wrapper_file == "hyperopt_optimizer.py": import bayesmark.builtin_opt.hyperopt_optimizer as opt elif wrapper_file == "nevergrad_optimizer.py": import bayesmark.builtin_opt.nevergrad_optimizer as opt elif wrapper_file == "opentuner_optimizer.py": import bayesmark.builtin_opt.opentuner_optimizer as opt elif wrapper_file == "pysot_optimizer.py": import bayesmark.builtin_opt.pysot_optimizer as opt elif wrapper_file == "random_optimizer.py": import bayesmark.builtin_opt.random_optimizer as opt elif wrapper_file == "scikit_optimizer.py": import bayesmark.builtin_opt.scikit_optimizer as opt else: assert False, "CONFIG for built in optimizers has added a new optimizer, but not updated this function." opt_class = opt.opt_wrapper return opt_class def main(): # pragma: main """This is where experiments happen. Usually called by the experiment launcher.""" description = "Run a study with one benchmark function and an optimizer" args = cmd.parse_args(cmd.experiment_parser(description)) opt_class = _get_opt_class(args[CmdArgs.optimizer]) experiment_main(opt_class, args=args) if __name__ == "__main__": main() # pragma: main ================================================ FILE: bayesmark/experiment_aggregate.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Aggregate the results of many studies to prepare analysis. """ import json import logging from collections import Counter import numpy as np import xarray as xr import bayesmark.constants as cc import bayesmark.xr_util as xru from bayesmark.cmd_parse import CmdArgs, agg_parser, parse_args, serializable_dict, unserializable_dict from bayesmark.constants import ARG_DELIM, EVAL_RESULTS, ITER, METHOD, SUGGEST, TEST_CASE, TIME_RESULTS, TRIAL from bayesmark.serialize import XRSerializer from bayesmark.signatures import analyze_signatures from bayesmark.sklearn_funcs import SklearnModel from bayesmark.util import str_join_safe logger = logging.getLogger(__name__) def validate_time(all_time): """Validate the aggregated time data set.""" assert isinstance(all_time, xr.Dataset) assert all_time[cc.SUGGEST_PHASE].dims == (ITER,) assert all_time[cc.EVAL_PHASE].dims == (ITER, SUGGEST) assert all_time[cc.OBS_PHASE].dims == (ITER,) assert xru.is_simple_coords(all_time.coords, min_side=1) def validate_perf(perf_da): """Validate the input eval data arrays.""" assert isinstance(perf_da, xr.Dataset) assert perf_da.dims == (ITER, SUGGEST) assert xru.is_simple_coords(perf_da.coords) assert not np.any(np.isnan(perf_da.values)) def validate_agg_perf(perf_da, min_trial=1): """Validate the aggregated eval data set.""" assert isinstance(perf_da, xr.DataArray) assert perf_da.dims == (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL) assert xru.is_simple_coords(perf_da.coords, dims=(ITER, SUGGEST, TRIAL)) assert not np.any(np.isnan(perf_da.values)) assert perf_da.sizes[TRIAL] >= min_trial def summarize_time(all_time): """Transform a single timing dataset from an experiment into a form better for aggregation. Parameters ---------- all_time : :class:`xarray:xarray.Dataset` Dataset with variables ``(SUGGEST_PHASE, EVAL_PHASE, OBS_PHASE)`` which have dimensions ``(ITER,)``, ``(ITER, SUGGEST)``, and ``(ITER,)``, respectively. The variable `EVAL_PHASE` has the function evaluation time for each parallel suggestion. Returns ------- time_summary : :class:`xarray:xarray.Dataset` Dataset with variables ``(SUGGEST_PHASE, OBS_PHASE, EVAL_PHASE_MAX, EVAL_PHASE_SUM)`` which all have dimensions ``(ITER,)``. The maximum `EVAL_PHASE_MAX` is relevant for wall clock time, while `EVAL_PHASE_SUM` is relevant for CPU time. """ validate_time(all_time) time_summary = xr.Dataset(coords=all_time.coords) time_summary[cc.SUGGEST_PHASE] = all_time[cc.SUGGEST_PHASE] time_summary[cc.OBS_PHASE] = all_time[cc.OBS_PHASE] time_summary[cc.EVAL_PHASE_MAX] = all_time[cc.EVAL_PHASE].max(dim=SUGGEST) time_summary[cc.EVAL_PHASE_SUM] = all_time[cc.EVAL_PHASE].sum(dim=SUGGEST) return time_summary def concat_experiments(all_experiments, ravel=False): """Aggregate the Datasets from a series of experiments into combined Dataset. Parameters ---------- all_experiments : typing.Iterable Iterable (possible from a generator) with the Datasets from each experiment. Each item in `all_experiments` is a pair containing ``(meta_data, data)``. See `load_experiments` for details on these variables, ravel : bool If true, ravel all studies to store batch suggestions as if they were serial. Returns ------- all_perf : :class:`xarray:xarray.Dataset` DataArray containing all of the `perf_da` from the experiments. The meta-data from the experiments are included as extra dimensions. `all_perf` has dimensions ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)``. To convert the `uuid` to a trial, there must be an equal number of repetition in the experiments for each `TEST_CASE`, `METHOD` combination. Likewise, all of the experiments need an equal number of `ITER` and `SUGGEST`. If `ravel` is true, then the `SUGGEST` is singleton. all_time : :class:`xarray:xarray.Dataset` Dataset containing all of the `time_ds` from the experiments. The new dimensions are ``(ITER, TEST_CASE, METHOD, TRIAL)``. It has the same variables as `time_ds`. all_suggest : :class:`xarray:xarray.Dataset` DataArray containing all of the `suggest_ds` from the experiments. It has dimensions ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)``. all_sigs : dict(str, list(list(float))) Aggregate of all experiment signatures. """ all_perf = {} all_time = {} all_suggest = {} all_sigs = {} trial_counter = Counter() for (test_case, optimizer, uuid), (perf_ds, time_ds, suggest_ds, sig) in all_experiments: if ravel: raise NotImplementedError("ravel is deprecated. Just reshape in analysis steps instead.") case_key = (test_case, optimizer, trial_counter[(test_case, optimizer)]) trial_counter[(test_case, optimizer)] += 1 # Process perf data assert all(perf_ds[kk].dims == (ITER, SUGGEST) for kk in perf_ds) all_perf[case_key] = perf_ds # Process time data all_time[case_key] = summarize_time(time_ds) # Process suggestion data all_suggest_curr = all_suggest.setdefault(test_case, {}) all_suggest_curr[case_key] = suggest_ds # Handle the signatures all_sigs.setdefault(test_case, []).append(sig) assert min(trial_counter.values()) == max(trial_counter.values()), "Uneven number of trials per test case" # Now need to concat dict of datasets into single dataset all_perf = xru.ds_concat(all_perf, dims=(TEST_CASE, METHOD, TRIAL)) assert all(all_perf[kk].dims == (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL) for kk in all_perf) assert not any( np.any(np.isnan(all_perf[kk].values)) for kk in all_perf ), "Missing combinations of method and test case" all_time = xru.ds_concat(all_time, dims=(TEST_CASE, METHOD, TRIAL)) assert all(all_time[kk].dims == (ITER, TEST_CASE, METHOD, TRIAL) for kk in all_time) assert not any(np.any(np.isnan(all_time[kk].values)) for kk in all_time) assert xru.coord_compat((all_perf, all_time), (ITER, TEST_CASE, METHOD, TRIAL)) for test_case in all_suggest: all_suggest[test_case] = xru.ds_concat(all_suggest[test_case], dims=(TEST_CASE, METHOD, TRIAL)) assert all( all_suggest[test_case][kk].dims == (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL) for kk in all_suggest[test_case] ) assert not any(np.any(np.isnan(all_suggest[test_case][kk].values)) for kk in all_suggest[test_case]) assert xru.coord_compat((all_perf, all_suggest[test_case]), (ITER, METHOD, TRIAL)) assert all_suggest[test_case].coords[TEST_CASE].shape == (1,), "test case should be singleton" return all_perf, all_time, all_suggest, all_sigs def load_experiments(uuid_list, db_root, dbid): # pragma: io """Generator to load the results of the experiments. Parameters ---------- uuid_list : list(uuid.UUID) List of UUIDs corresponding to experiments to load. db_root : str Root location for data store as requested by the serializer used. dbid : str Name of the data store as requested by the serializer used. Yields ------ meta_data : (str, str, str) The `meta_data` contains a `tuple` of `str` with ``test_case, optimizer, uuid``. data : (:class:`xarray:xarray.Dataset`, :class:`xarray:xarray.Dataset`, :class:`xarray:xarray.Dataset` list(float)) The `data` contains a tuple of ``(perf_ds, time_ds, suggest_ds, sig)``. The `perf_ds` is a :class:`xarray:xarray.Dataset` containing the evaluation results with dimensions ``(ITER, SUGGEST)``, each variable is an objective. The `time_ds` is an :class:`xarray:xarray.Dataset` containing the timing results of the form accepted by `summarize_time`. The coordinates must be compatible with `perf_ds`. The suggest_ds is a :class:`xarray:xarray.Dataset` containing the inputs to the function evaluations. Each variable is a function input. Finally, `sig` contains the `test_case` signature and must be `list(float)`. """ uuids_seen = set() for uuid_ in uuid_list: logger.info(uuid_.hex) # Load perf and timing data perf_ds, meta = XRSerializer.load(db_root, db=dbid, key=cc.EVAL, uuid_=uuid_) time_ds, meta_t = XRSerializer.load(db_root, db=dbid, key=cc.TIME, uuid_=uuid_) assert meta == meta_t, "meta data should between time and eval files" suggest_ds, meta_t = XRSerializer.load(db_root, db=dbid, key=cc.SUGGEST_LOG, uuid_=uuid_) assert meta == meta_t, "meta data should between suggest and eval files" # Get signature to pass out as well _, sig = meta["signature"] logger.info(meta) logger.info(sig) # Build the new indices for combined data, this could be put in function for easier testing eval_args = unserializable_dict(meta["args"]) # Unpack meta-data test_case = SklearnModel.test_case_str( eval_args[CmdArgs.classifier], eval_args[CmdArgs.data], eval_args[CmdArgs.metric] ) optimizer = str_join_safe( ARG_DELIM, (eval_args[CmdArgs.optimizer], eval_args[CmdArgs.opt_rev], eval_args[CmdArgs.rev]) ) args_uuid = eval_args[CmdArgs.uuid] # Check UUID sanity assert isinstance(args_uuid, str) assert args_uuid == uuid_.hex, "UUID meta-data does not match filename" assert args_uuid not in uuids_seen, "uuids being reused between studies" uuids_seen.add(args_uuid) # Return key -> data so this generator can be iterated over in dict like manner meta_data = (test_case, optimizer, args_uuid) data = (perf_ds, time_ds, suggest_ds, sig) yield meta_data, data def main(): """See README for instructions on calling aggregate. """ description = "Aggregate study results across functions and optimizers" args = parse_args(agg_parser(description)) logger.setLevel(logging.INFO) # Note this is the module-wide logger if args[CmdArgs.verbose]: logger.addHandler(logging.StreamHandler()) # Get list of UUIDs uuid_list = XRSerializer.get_uuids(args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.EVAL) uuid_list_ = XRSerializer.get_uuids(args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.TIME) assert uuid_list == uuid_list_, "UUID list does not match between time and eval results" uuid_list_ = XRSerializer.get_uuids(args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.SUGGEST_LOG) assert uuid_list == uuid_list_, "UUID list does not match between suggest log and eval results" # Get iterator of all experiment data dumps, load in and process, and concat data_G = load_experiments(uuid_list, args[CmdArgs.db_root], args[CmdArgs.db]) all_perf, all_time, all_suggest, all_sigs = concat_experiments(data_G, ravel=args[CmdArgs.ravel]) # Check the concat signatures make are coherent sig_errs, signatures_median = analyze_signatures(all_sigs) logger.info("Signature errors:\n%s" % sig_errs.to_string()) print(json.dumps({"exp-agg sig errors": sig_errs.T.to_dict()})) # Dump and save it all out logger.info("saving") meta = {"args": serializable_dict(args), "signature": signatures_median} XRSerializer.save_derived(all_perf, meta, args[CmdArgs.db_root], db=args[CmdArgs.db], key=EVAL_RESULTS) XRSerializer.save_derived(all_time, meta, args[CmdArgs.db_root], db=args[CmdArgs.db], key=TIME_RESULTS) for test_case, ds in all_suggest.items(): XRSerializer.save_derived(ds, meta, args[CmdArgs.db_root], db=args[CmdArgs.db], key=test_case) logger.info("done") if __name__ == "__main__": main() # pragma: main ================================================ FILE: bayesmark/experiment_analysis.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Perform analysis to compare different optimizers across problems. """ import json import logging import warnings from collections import OrderedDict import numpy as np import xarray as xr import bayesmark.constants as cc import bayesmark.quantiles as qt import bayesmark.xr_util as xru from bayesmark.cmd_parse import CmdArgs, general_parser, parse_args, serializable_dict from bayesmark.constants import ( ITER, LB_MEAN, LB_MED, LB_NORMED_MEAN, METHOD, NORMED_MEAN, NORMED_MED, OBJECTIVE, PERF_BEST, PERF_CLIP, PERF_MEAN, PERF_MED, SUGGEST, TEST_CASE, TRIAL, UB_MEAN, UB_MED, UB_NORMED_MEAN, ) from bayesmark.experiment_aggregate import validate_agg_perf from bayesmark.experiment_baseline import do_baseline from bayesmark.np_util import cummin, linear_rescale from bayesmark.serialize import XRSerializer from bayesmark.signatures import analyze_signature_pair from bayesmark.stats import t_EB # Mathematical settings EVAL_Q = 0.5 # Evaluate based on median loss across n_trials ALPHA = 0.05 # ==> 95% CIs logger = logging.getLogger(__name__) def get_perf_array(evals, evals_visible): """Get the actual (e.g., generalization loss) over iterations. Parameters ---------- evals : :class:`numpy:numpy.ndarray` of shape (n_iter, n_batch, n_trials) The actual loss (e.g., generalization) for a given experiment. evals_visible : :class:`numpy:numpy.ndarray` of shape (n_iter, n_batch, n_trials) The observable loss (e.g., validation) for a given experiment. Returns ------- perf_array : :class:`numpy:numpy.ndarray` of shape (n_iter, n_trials) The best performance so far at iteration i from `evals`. Where the best has been selected according to `evals_visible`. """ n_iter, _, n_trials = evals.shape assert evals.size > 0, "perf array not supported for empty arrays" assert evals_visible.shape == evals.shape assert not np.any(np.isnan(evals)) assert not np.any(np.isnan(evals_visible)) idx = np.argmin(evals_visible, axis=1) perf_array = np.take_along_axis(evals, idx[:, None, :], axis=1).squeeze(axis=1) assert perf_array.shape == (n_iter, n_trials) visible_perf_array = np.min(evals_visible, axis=1) assert visible_perf_array.shape == (n_iter, n_trials) # Get the minimum from the visible loss perf_array = cummin(perf_array, visible_perf_array) return perf_array def compute_aggregates(perf_da, baseline_ds, visible_perf_da=None): """Aggregate function evaluations in the experiments to get performance summaries of each method. Parameters ---------- perf_da : :class:`xarray:xarray.DataArray` Aggregate experimental results with each function evaluation in the experiments according to true loss (e.g., generalization). `perf_da` has dimensions ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)`` as is assumed to have no nan values. baseline_ds : :class:`xarray:xarray.Dataset` Dataset with baseline performance. It was variables ``(PERF_MED, PERF_MEAN, PERF_CLIP, PERF_BEST)`` with dimensions ``(ITER, TEST_CASE)``, ``(ITER, TEST_CASE)``, ``(TEST_CASE,)``, and ``(TEST_CASE,)``, respectively. `PERF_MED` is a baseline of performance based on random search when using medians to summarize performance. Likewise, `PERF_MEAN` is for means. `PERF_CLIP` is an upperbound to clip poor performance when using the mean. `PERF_BEST` is an estimate on the global minimum. visible_perf_da : :class:`xarray:xarray.DataArray` Aggregate experimental results with each function evaluation in the experiments according to visible loss (e.g., validation). `visible_perf_da` has dimensions ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)`` as is assumed to have no nan values. If `None`, we set ``visible_perf_da = perf_da``. Returns ------- agg_result : :class:`xarray:xarray.Dataset` Dataset with summary of performance for each method and test case combination. Contains variables: ``(PERF_MED, LB_MED, UB_MED, NORMED_MED, PERF_MEAN, LB_MEAN, UB_MEAN, NORMED_MEAN)`` each with dimensions ``(ITER, METHOD, TEST_CASE)``. `PERF_MED` is a median summary of performance with `LB_MED` and `UB_MED` as error bars. `NORMED_MED` is a rescaled `PERF_MED` so we expect the optimal performance is 0, and random search gives 1 at all `ITER`. Likewise, `PERF_MEAN`, `LB_MEAN`, `UB_MEAN`, `NORMED_MEAN` are for mean performance. summary : :class:`xarray:xarray.Dataset` Dataset with overall summary of performance of each method. Contains variables ``(PERF_MED, LB_MED, UB_MED, PERF_MEAN, LB_MEAN, UB_MEAN)`` each with dimensions ``(ITER, METHOD)``. """ validate_agg_perf(perf_da, min_trial=1) assert isinstance(baseline_ds, xr.Dataset) assert tuple(baseline_ds[PERF_BEST].dims) == (TEST_CASE,) assert tuple(baseline_ds[PERF_CLIP].dims) == (TEST_CASE,) assert tuple(baseline_ds[PERF_MED].dims) == (ITER, TEST_CASE) assert tuple(baseline_ds[PERF_MEAN].dims) == (ITER, TEST_CASE) assert xru.coord_compat((perf_da, baseline_ds), (ITER, TEST_CASE)) assert not any(np.any(np.isnan(baseline_ds[kk].values)) for kk in baseline_ds) # Now actually get the aggregate performance numbers per test case agg_result = xru.ds_like( perf_da, (PERF_MED, LB_MED, UB_MED, NORMED_MED, PERF_MEAN, LB_MEAN, UB_MEAN, NORMED_MEAN), (ITER, METHOD, TEST_CASE), ) baseline_mean_da = xru.only_dataarray(xru.ds_like(perf_da, ["ref"], (ITER, TEST_CASE))) # Using values here since just clearer to get raw items than xr object for func_name for func_name in perf_da.coords[TEST_CASE].values: rand_perf_med = baseline_ds[PERF_MED].sel({TEST_CASE: func_name}, drop=True).values rand_perf_mean = baseline_ds[PERF_MEAN].sel({TEST_CASE: func_name}, drop=True).values best_opt = baseline_ds[PERF_BEST].sel({TEST_CASE: func_name}, drop=True).values base_clip_val = baseline_ds[PERF_CLIP].sel({TEST_CASE: func_name}, drop=True).values assert np.all(np.diff(rand_perf_med) <= 0), "Baseline should be decreasing with iteration" assert np.all(np.diff(rand_perf_mean) <= 0), "Baseline should be decreasing with iteration" assert np.all(rand_perf_med > best_opt) assert np.all(rand_perf_mean > best_opt) assert np.all(rand_perf_mean <= base_clip_val) baseline_mean_da.loc[{TEST_CASE: func_name}] = linear_rescale( rand_perf_mean, best_opt, base_clip_val, 0.0, 1.0, enforce_bounds=False ) for method_name in perf_da.coords[METHOD].values: # Take the minimum over all suggestion at given iter + sanity check perf_da curr_da = perf_da.sel({METHOD: method_name, TEST_CASE: func_name}, drop=True) assert curr_da.dims == (ITER, SUGGEST, TRIAL) if visible_perf_da is None: perf_array = get_perf_array(curr_da.values, curr_da.values) curr_da_ = perf_da.sel({METHOD: method_name, TEST_CASE: func_name}, drop=True).min(dim=SUGGEST) assert curr_da_.dims == (ITER, TRIAL) perf_array_ = np.minimum.accumulate(curr_da_.values, axis=0) assert np.allclose(perf_array, perf_array_) else: curr_visible_da = visible_perf_da.sel({METHOD: method_name, TEST_CASE: func_name}, drop=True) assert curr_visible_da.dims == (ITER, SUGGEST, TRIAL) perf_array = get_perf_array(curr_da.values, curr_visible_da.values) # Compute median perf and CI on it med_perf, LB, UB = qt.quantile_and_CI(perf_array, EVAL_Q, alpha=ALPHA) assert med_perf.shape == rand_perf_med.shape agg_result[PERF_MED].loc[{TEST_CASE: func_name, METHOD: method_name}] = med_perf agg_result[LB_MED].loc[{TEST_CASE: func_name, METHOD: method_name}] = LB agg_result[UB_MED].loc[{TEST_CASE: func_name, METHOD: method_name}] = UB # Now store normed version, which is better for aggregation normed = linear_rescale(med_perf, best_opt, rand_perf_med, 0.0, 1.0, enforce_bounds=False) agg_result[NORMED_MED].loc[{TEST_CASE: func_name, METHOD: method_name}] = normed # Store normed mean version normed = linear_rescale(perf_array, best_opt, base_clip_val, 0.0, 1.0, enforce_bounds=False) # Also, clip the score from below at -1 to limit max influence of single run on final average normed = np.clip(normed, -1.0, 1.0) normed = np.mean(normed, axis=1) agg_result[NORMED_MEAN].loc[{TEST_CASE: func_name, METHOD: method_name}] = normed # Compute mean perf and CI on it perf_array = np.minimum(base_clip_val, perf_array) mean_perf = np.mean(perf_array, axis=1) assert mean_perf.shape == rand_perf_mean.shape EB = t_EB(perf_array, alpha=ALPHA, axis=1) assert EB.shape == rand_perf_mean.shape agg_result[PERF_MEAN].loc[{TEST_CASE: func_name, METHOD: method_name}] = mean_perf agg_result[LB_MEAN].loc[{TEST_CASE: func_name, METHOD: method_name}] = mean_perf - EB agg_result[UB_MEAN].loc[{TEST_CASE: func_name, METHOD: method_name}] = mean_perf + EB assert not any(np.any(np.isnan(agg_result[kk].values)) for kk in agg_result) # Compute summary score over all test cases, summarize performance of each method summary = xru.ds_like( perf_da, (PERF_MED, LB_MED, UB_MED, PERF_MEAN, LB_MEAN, UB_MEAN, NORMED_MEAN, LB_NORMED_MEAN, UB_NORMED_MEAN), (ITER, METHOD), ) summary[PERF_MED], summary[LB_MED], summary[UB_MED] = xr.apply_ufunc( qt.quantile_and_CI, agg_result[NORMED_MED], input_core_dims=[[TEST_CASE]], kwargs={"q": EVAL_Q, "alpha": ALPHA}, output_core_dims=[[], [], []], ) summary[PERF_MEAN] = agg_result[NORMED_MEAN].mean(dim=TEST_CASE) EB = xr.apply_ufunc(t_EB, agg_result[NORMED_MEAN], input_core_dims=[[TEST_CASE]]) summary[LB_MEAN] = summary[PERF_MEAN] - EB summary[UB_MEAN] = summary[PERF_MEAN] + EB normalizer = baseline_mean_da.mean(dim=TEST_CASE) summary[NORMED_MEAN] = summary[PERF_MEAN] / normalizer summary[LB_NORMED_MEAN] = summary[LB_MEAN] / normalizer summary[UB_NORMED_MEAN] = summary[UB_MEAN] / normalizer assert all(tuple(summary[kk].dims) == (ITER, METHOD) for kk in summary) return agg_result, summary def main(): """See README for instructions on calling analysis. """ description = "Analyze results from aggregated studies" args = parse_args(general_parser(description)) # Metric used on leaderboard leaderboard_metric = cc.VISIBLE_TO_OPT logger.setLevel(logging.INFO) # Note this is the module-wide logger if args[CmdArgs.verbose]: logger.addHandler(logging.StreamHandler()) # Load in the eval data and sanity check perf_ds, meta = XRSerializer.load_derived(args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.EVAL_RESULTS) logger.info("Meta data from source file: %s" % str(meta["args"])) # Check if there is baselines file, other make one if cc.BASELINE not in XRSerializer.get_derived_keys(args[CmdArgs.db_root], db=args[CmdArgs.db]): warnings.warn("Baselines not found. Need to construct baseline.") do_baseline(args) # Load in baseline scores data and sanity check (including compatibility with eval data) baseline_ds, meta_ref = XRSerializer.load_derived(args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.BASELINE) logger.info("baseline data from source ref file: %s" % str(meta_ref["args"])) # Check test case signatures match between eval data and baseline data sig_errs, signatures = analyze_signature_pair(meta["signature"], meta_ref["signature"]) logger.info("Signature errors:\n%s" % sig_errs.to_string()) print(json.dumps({"exp-anal sig errors": sig_errs.T.to_dict()})) # Subset baseline to only the test cases run in the experiments test_cases_run = perf_ds.coords[TEST_CASE].values.tolist() assert set(test_cases_run) <= set( baseline_ds.coords[TEST_CASE].values.tolist() ), "Data set contains test cases not found in baseline." baseline_ds = baseline_ds.sel({TEST_CASE: test_cases_run}) # Also subset to allow shorter runs iters_run = perf_ds.coords[ITER].values.tolist() assert set(iters_run) <= set( baseline_ds.coords[ITER].values.tolist() ), "Data set not same batch size or too many iters compared to baseline." baseline_ds = baseline_ds.sel({ITER: iters_run}) # Do the actual computation perf_visible = perf_ds[cc.VISIBLE_TO_OPT] agg_result = OrderedDict() summary = OrderedDict() for metric_for_scoring in sorted(perf_ds): perf_da = perf_ds[metric_for_scoring] baseline_ds_ = baseline_ds.sel({OBJECTIVE: metric_for_scoring}, drop=True) agg_result[(metric_for_scoring,)], summary[(metric_for_scoring,)] = compute_aggregates( perf_da, baseline_ds_, perf_visible ) agg_result = xru.ds_concat(agg_result, dims=(cc.OBJECTIVE,)) summary = xru.ds_concat(summary, dims=(cc.OBJECTIVE,)) for metric_for_scoring in sorted(perf_ds): # Print summary by problem # Recall that: # ... summary[PERF_MEAN] = agg_result[NORMED_MEAN].mean(dim=TEST_CASE) # ... summary[NORMED_MEAN] = summary[PERF_MEAN] / normalizer # Where normalizer is constant across all problems, optimizers print("Scores by problem (JSON):\n") agg_df = agg_result[NORMED_MEAN].sel({cc.OBJECTIVE: metric_for_scoring}, drop=True)[{ITER: -1}].to_pandas().T print(json.dumps({metric_for_scoring: agg_df.to_dict()})) print("\n") final_score = summary[PERF_MED].sel({cc.OBJECTIVE: metric_for_scoring}, drop=True)[{ITER: -1}] logger.info("median score @ %d:\n%s" % (summary.sizes[ITER], xru.da_to_string(final_score))) final_score = summary[PERF_MEAN].sel({cc.OBJECTIVE: metric_for_scoring}, drop=True)[{ITER: -1}] logger.info("mean score @ %d:\n%s" % (summary.sizes[ITER], xru.da_to_string(final_score))) print("Final scores (JSON):\n") print(json.dumps({metric_for_scoring: final_score.to_series().to_dict()})) print("\n") final_score = summary[NORMED_MEAN].sel({cc.OBJECTIVE: metric_for_scoring}, drop=True)[{ITER: -1}] logger.info("normed mean score @ %d:\n%s" % (summary.sizes[ITER], xru.da_to_string(final_score))) # Now saving results meta = {"args": serializable_dict(args), "signature": signatures} XRSerializer.save_derived(agg_result, meta, args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.PERF_RESULTS) XRSerializer.save_derived(summary, meta, args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.MEAN_SCORE) final_msg = xru.da_to_string( 100 * (1.0 - summary[PERF_MEAN].sel({cc.OBJECTIVE: leaderboard_metric}, drop=True)[{ITER: -1}]) ) logger.info("-" * 20) logger.info("Final score `100 x (1-loss)` for leaderboard:\n%s" % final_msg) if __name__ == "__main__": main() # pragma: main ================================================ FILE: bayesmark/experiment_baseline.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Build performance baselines from aggregate results to prepare analysis. """ import logging import warnings from collections import OrderedDict import numpy as np import bayesmark.constants as cc import bayesmark.expected_max as em import bayesmark.quantiles as qt from bayesmark.cmd_parse import CmdArgs, general_parser, parse_args from bayesmark.constants import ARG_DELIM, ITER, METHOD, PERF_BEST, PERF_CLIP, PERF_MEAN, PERF_MED, SUGGEST, TEST_CASE from bayesmark.experiment_aggregate import validate_agg_perf from bayesmark.serialize import XRSerializer from bayesmark.util import str_join_safe from bayesmark.xr_util import ds_concat, ds_like_mixed # Mathematical settings # We could move these to constants to eliminate repetition but we will probably phase out anyway EVAL_Q = 0.5 # Evaluate based on median loss across n_trials ALPHA = 0.05 # ==> 95% CIs MIN_POS = np.nextafter(0, 1) PAD_FACTOR = 10000 logger = logging.getLogger(__name__) def validate(baseline_ds): """Perform same tracks as will happen in analysis.""" for func_name in baseline_ds.coords[TEST_CASE].values: rand_perf_med = baseline_ds[PERF_MED].sel({TEST_CASE: func_name}, drop=True).values rand_perf_mean = baseline_ds[PERF_MEAN].sel({TEST_CASE: func_name}, drop=True).values best_opt = baseline_ds[PERF_BEST].sel({TEST_CASE: func_name}, drop=True).values base_clip_val = baseline_ds[PERF_CLIP].sel({TEST_CASE: func_name}, drop=True).values assert np.all(np.diff(rand_perf_med) <= 0), "Baseline should be decreasing with iteration" assert np.all(np.diff(rand_perf_mean) <= 0), "Baseline should be decreasing with iteration" assert np.all(rand_perf_med > best_opt) assert np.all(rand_perf_mean > best_opt) assert np.all(rand_perf_mean <= base_clip_val) def compute_baseline(perf_da): """Compute a performance baseline of base and best performance from the aggregate experimental results. Parameters ---------- perf_da : :class:`xarray:xarray.DataArray` Aggregate experimental results with each function evaluation in the experiments. `all_perf` has dimensions ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)`` as is assumed to have no nan values. Returns ------- baseline_ds : :class:`xarray:xarray.Dataset` Dataset with baseline performance. It was variables ``(PERF_MED, PERF_MEAN, PERF_CLIP, PERF_BEST)`` with dimensions ``(ITER, TEST_CASE)``, ``(ITER, TEST_CASE)``, ``(TEST_CASE,)``, and ``(TEST_CASE,)``, respectively. `PERF_MED` is a baseline of performance based on random search when using medians to summarize performance. Likewise, `PERF_MEAN` is for means. `PERF_CLIP` is an upperbound to clip poor performance when using the mean. `PERF_BEST` is an estimate on the global minimum. """ validate_agg_perf(perf_da) ref_prefix = str_join_safe(ARG_DELIM, (cc.RANDOM_SEARCH, "")) ref_random = [kk for kk in perf_da.coords[METHOD].values if kk.startswith(ref_prefix)] assert len(ref_random) > 0, "Did not find any random search in methods." # Now many points we will have after each batch trials_grid = perf_da.sizes[SUGGEST] * (1 + np.arange(perf_da.sizes[ITER])) # Now iterate over problems and get baseline performance baseline_ds = ds_like_mixed( perf_da, [ (PERF_MED, [ITER, TEST_CASE]), (PERF_MEAN, [ITER, TEST_CASE]), (PERF_CLIP, [TEST_CASE]), (PERF_BEST, [TEST_CASE]), ], (ITER, TEST_CASE), ) for func_name in perf_da.coords[TEST_CASE].values: random_evals = np.ravel(perf_da.sel({METHOD: ref_random, TEST_CASE: func_name}, drop=True).values) assert random_evals.size > 0 # We will likely change this to a min mean (instead of median) using a different util in near future: assert np.all(trials_grid == perf_da.sizes[SUGGEST] * (1 + baseline_ds.coords[ITER].values)) rand_perf, _, _ = qt.min_quantile_CI(random_evals, EVAL_Q, trials_grid, alpha=ALPHA) baseline_ds[PERF_MED].loc[{TEST_CASE: func_name}] = rand_perf # Decide on a level to clip when computing the mean base_clip_val = qt.quantile(random_evals, EVAL_Q) assert np.isfinite(base_clip_val), "Median random search performance is not even finite." assert (perf_da.sizes[SUGGEST] > 1) or np.isclose(base_clip_val, rand_perf[0]) baseline_ds[PERF_CLIP].loc[{TEST_CASE: func_name}] = base_clip_val # Estimate the global min via best of any method best_opt = np.min(perf_da.sel({TEST_CASE: func_name}, drop=True).values) if np.any(rand_perf <= best_opt): warnings.warn( "Random search is also the best search on %s, the normalized score may be meaningless." % func_name, RuntimeWarning, ) assert np.isfinite(best_opt), "Best performance found is not even finite." logger.info("best %s %f" % (func_name, best_opt)) # Now make sure strictly less than to avoid assert error in linear_rescale. This will likely give normalized # scores of +inf or -inf, but with median summary that is ok. When everything goes to mean, we will need to # change this: pad = PAD_FACTOR * np.spacing(-np.maximum(MIN_POS, np.abs(best_opt))) assert pad < 0 best_opt = best_opt + pad assert np.isfinite(best_opt), "Best performance too close to limit of float range." assert np.all(rand_perf > best_opt) baseline_ds[PERF_BEST].loc[{TEST_CASE: func_name}] = best_opt random_evals = np.minimum(base_clip_val, random_evals) assert np.all(np.isfinite(random_evals)) assert np.all(best_opt <= random_evals) rand_perf = em.expected_min(random_evals, trials_grid) rand_perf_fixed = np.minimum(base_clip_val, rand_perf) assert np.allclose(rand_perf, rand_perf_fixed) rand_perf_fixed = np.minimum.accumulate(rand_perf_fixed) assert np.allclose(rand_perf, rand_perf_fixed) baseline_ds[PERF_MEAN].loc[{TEST_CASE: func_name}] = rand_perf_fixed assert not any(np.any(np.isnan(baseline_ds[kk].values)) for kk in baseline_ds) validate(baseline_ds) return baseline_ds def do_baseline(args): # pragma: io """Alternate entry into the program without calling the actual main. """ # Load in the eval data and sanity check perf_ds, meta = XRSerializer.load_derived(args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.EVAL_RESULTS) logger.info("Meta data from source file: %s" % str(meta["args"])) D = OrderedDict() for kk in perf_ds: perf_da = perf_ds[kk] D[(kk,)] = compute_baseline(perf_da) baseline_ds = ds_concat(D, dims=(cc.OBJECTIVE,)) # Keep in same order for cleanliness baseline_ds = baseline_ds.sel({cc.OBJECTIVE: list(perf_ds)}) assert list(perf_ds) == baseline_ds.coords[cc.OBJECTIVE].values.tolist() # Could optionally remove this once we think things have enough tests for kk in D: assert baseline_ds.sel({cc.OBJECTIVE: kk[0]}, drop=True).identical(D[kk]) # Now dump the results XRSerializer.save_derived(baseline_ds, meta, args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.BASELINE) def main(): """See README for instructions on calling baseline. """ description = "Aggregate the baselines for later analysis in benchmark" args = parse_args(general_parser(description)) logger.setLevel(logging.INFO) # Note this is the module-wide logger if args[CmdArgs.verbose]: logger.addHandler(logging.StreamHandler()) do_baseline(args) logger.info("done") if __name__ == "__main__": main() # pragma: main ================================================ FILE: bayesmark/experiment_db_init.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Tool to create new datebase for results. This is just wrapper on serializer init call. """ import logging import bayesmark.cmd_parse as cmd from bayesmark.cmd_parse import CmdArgs from bayesmark.constants import EXP_VARS from bayesmark.serialize import XRSerializer EXIST_OK = True logger = logging.getLogger(__name__) def main(): """See README for instructions on calling db_init. """ description = "Initialize the directories for running the experiments" args = cmd.parse_args(cmd.general_parser(description)) assert not args[CmdArgs.dry_run], "Dry run doesn't make any sense when building dirs" logger.setLevel(logging.INFO) # Note this is the module-wide logger if args[CmdArgs.verbose]: logger.addHandler(logging.StreamHandler()) XRSerializer.init_db(args[CmdArgs.db_root], db=args[CmdArgs.db], keys=EXP_VARS, exist_ok=EXIST_OK) logger.info("done") if __name__ == "__main__": main() # pragma: main ================================================ FILE: bayesmark/experiment_launcher.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Launch studies in separate studies or do dry run to build jobs file with lists of commands to run. """ import json import logging import random as pyrandom import uuid as pyuuid import warnings from itertools import product from subprocess import TimeoutExpired, call import numpy as np import bayesmark.cmd_parse as cmd from bayesmark.builtin_opt.config import CONFIG from bayesmark.cmd_parse import CMD_STR, CmdArgs, serializable_dict from bayesmark.constants import ARG_DELIM, DATA_LOADER_NAMES, EXP_VARS, METRICS, MODEL_NAMES, PY_INTERPRETER from bayesmark.data import METRICS_LOOKUP, get_problem_type from bayesmark.np_util import random as np_random from bayesmark.np_util import random_seed, strat_split from bayesmark.path_util import absopen from bayesmark.serialize import XRSerializer from bayesmark.util import range_str, shell_join, str_join_safe, strict_sorted # How much of uuid to put in job name to avoid name clashes UUID_JOB_CHARS = 7 # Warning: this name is also specified in setup.py, and violates the DRY principle. So if it gets changed in setup.py, # it must also be changed here! EXPERIMENT_ENTRY = "bayesmark-exp" logger = logging.getLogger(__name__) def _is_arg_safe(ss): """Check if `str` is safe as argument to `argparse`.""" if len(ss) == 0: return False safe = ss[0] != "-" return safe def arg_safe_str(val): """Cast value as `str`, raise error if not safe as argument to `argparse`.""" ss = str(val) if not _is_arg_safe(ss): raise ValueError("%s is not safe for argparse" % ss) return ss def gen_commands(args, opt_file_lookup, run_uuid): """Generator providing commands to launch processes for experiments. Parameters ---------- args : dict(CmdArgs, [int, str]) Arguments of options to pass to the experiments being launched. The keys corresponds to the same arguments passed to this program. opt_file_lookup : dict(str, str) Mapping from method name to filename containing wrapper class for the method. run_uuid : uuid.UUID UUID for this launcher run. Needed to generate different experiments UUIDs on each call. This function is deterministic provided the same `run_uuid`. Yields ------ iteration_key : (str, str, str, str) Tuple containing ``(trial, classifier, data, optimizer)`` to index the experiment. full_cmd : tuple(str) Strings containing command and arguments to run a process with experiment. Join with whitespace or use :func:`.util.shell_join` to get string with executable command. The command omits ``--opt-root`` which means it will default to ``.`` if the command is executed. As such, the command assumes it is executed with ``--opt-root`` as the working directory. """ args_to_pass_thru = [CmdArgs.n_calls, CmdArgs.n_suggest, CmdArgs.db_root, CmdArgs.db] # This could be made simpler and avoid if statement if we just always pass dataroot, even if no custom data used. if args[CmdArgs.data_root] is not None: args_to_pass_thru.append(CmdArgs.data_root) # Possibilities to iterate over. Put them in sorted order just for good measure. c_list = strict_sorted(MODEL_NAMES if args[CmdArgs.classifier] is None else args[CmdArgs.classifier]) d_list = strict_sorted(DATA_LOADER_NAMES if args[CmdArgs.data] is None else args[CmdArgs.data]) o_list = strict_sorted( list(opt_file_lookup.keys()) + list(CONFIG.keys()) if args[CmdArgs.optimizer] is None else args[CmdArgs.optimizer] ) assert all( ((optimizer in opt_file_lookup) or (optimizer in CONFIG)) for optimizer in o_list ), "unknown optimizer in optimizer list" m_set = set(METRICS if args[CmdArgs.metric] is None else args[CmdArgs.metric]) m_lookup = {problem_type: sorted(m_set.intersection(mm)) for problem_type, mm in METRICS_LOOKUP.items()} assert all( (len(m_lookup[get_problem_type(data)]) > 0) for data in d_list ), "At one metric needed for each problem type of data sets" G = product(range_str(args[CmdArgs.n_repeat]), c_list, d_list, o_list) # iterate all combos for rep, classifier, data, optimizer in G: _, rep_str = rep problem_type = get_problem_type(data) for metric in m_lookup[problem_type]: # Get a reproducible string based (conditioned on having same (run uuid), but should also never give # a duplicate (unless we force the same run uuid twice). iteration_key = (rep_str, classifier, data, optimizer, metric) iteration_id = str_join_safe(ARG_DELIM, iteration_key) sub_uuid = pyuuid.uuid5(run_uuid, iteration_id).hex # Build the argument list for subproc, passing some args thru cmd_args_pass_thru = [[CMD_STR[vv][0], arg_safe_str(args[vv])] for vv in args_to_pass_thru] # Technically, the optimizer is is not actually needed here for non-built in optimizers because it already # specified via the entry point: optimizer_wrapper_file cmd_args = [ [CMD_STR[CmdArgs.classifier][0], arg_safe_str(classifier)], [CMD_STR[CmdArgs.data][0], arg_safe_str(data)], [CMD_STR[CmdArgs.optimizer][0], arg_safe_str(optimizer)], [CMD_STR[CmdArgs.uuid][0], arg_safe_str(sub_uuid)], [CMD_STR[CmdArgs.metric][0], arg_safe_str(metric)], ] cmd_args = tuple(sum(cmd_args + cmd_args_pass_thru, [])) logger.info(" ".join(cmd_args)) # The experiment command without the arguments if optimizer in CONFIG: # => built in optimizer wrapper experiment_cmd = (EXPERIMENT_ENTRY,) else: optimizer_wrapper_file = opt_file_lookup[optimizer] assert optimizer_wrapper_file.endswith(".py"), "optimizer wrapper should a be .py file" experiment_cmd = (PY_INTERPRETER, optimizer_wrapper_file) # Check arg safe again, off elements in list need to be argsafe assert all((_is_arg_safe(ss) == (ii % 2 == 1)) for ii, ss in enumerate(cmd_args)) full_cmd = experiment_cmd + cmd_args yield iteration_key, full_cmd def dry_run(args, opt_file_lookup, run_uuid, fp, random=np_random): """Write to buffer description of commands for running all experiments. This function is almost pure by writing to a buffer, but it could be switched to a generator. Parameters ---------- args : dict(CmdArgs, [int, str]) Arguments of options to pass to the experiments being launched. The keys corresponds to the same arguments passed to this program. opt_file_lookup : dict(str, str) Mapping from method name to filename containing wrapper class for the method. run_uuid : uuid.UUID UUID for this launcher run. Needed to generate different experiments UUIDs on each call. This function is deterministic provided the same `run_uuid`. fp : writable buffer File handle to write out sequence of commands to execute (broken into jobs on each line) to execute all the experiments (possibly each job in parallel). random : RandomState Random stream to use for reproducibility. """ assert args[CmdArgs.n_jobs] > 0, "Must have non-zero jobs for dry run" # Taking in file pointer since then we can test without actual file. Could also build generator that returns lines # to write. manual_setup_info = XRSerializer.init_db_manual(args[CmdArgs.db_root], db=args[CmdArgs.db], keys=EXP_VARS) warnings.warn(manual_setup_info, UserWarning) # Get the commands dry_run_commands = {} G = gen_commands(args, opt_file_lookup, run_uuid) for (_, _, _, optimizer, _), full_cmd in G: cmd_str = shell_join(full_cmd) dry_run_commands.setdefault(optimizer, []).append(cmd_str) # Make sure we never have any empty jobs, which is a waste n_commands = sum(len(v) for v in dry_run_commands.values()) n_jobs = min(args[CmdArgs.n_jobs], n_commands) # Would prob also work with pyrandom, but only tested np random so far subcommands = strat_split(list(dry_run_commands.values()), n_jobs, random=random) # Make sure have same commands overall, delete once we trust strat_split assert sorted(np.concatenate(subcommands)) == sorted(sum(list(dry_run_commands.values()), [])) job_suffix = run_uuid.hex[:UUID_JOB_CHARS] # Include comments as reproducibility lines args_str = serializable_dict(args) fp.write("# running: %s\n" % str(args_str)) fp.write("# cmd: %s\n" % cmd.cmd_str()) for ii, ii_str in range_str(n_jobs): assert len(subcommands[ii]) > 0 fp.write("job_%s_%s %s\n" % (job_suffix, ii_str, " && ".join(subcommands[ii]))) def real_run(args, opt_file_lookup, run_uuid, timeout=None): # pragma: io """Run sequence of independent experiments to fully run the benchmark. This uses `subprocess` to launch a separate process (in serial) for each experiment. Parameters ---------- args : dict(CmdArgs, [int, str]) Arguments of options to pass to the experiments being launched. The keys corresponds to the same arguments passed to this program. opt_file_lookup : dict(str, str) Mapping from method name to filename containing wrapper class for the method. run_uuid : uuid.UUID UUID for this launcher run. Needed to generate different experiments UUIDs on each call. This function is deterministic provided the same `run_uuid`. timeout : int Max seconds per experiment """ args[CmdArgs.db] = XRSerializer.init_db(args[CmdArgs.db_root], db=args[CmdArgs.db], keys=EXP_VARS, exist_ok=True) logger.info("Supply --db %s to append to this experiment or reproduce jobs file." % args[CmdArgs.db]) # Get and run the commands in a sub-process counter = 0 G = gen_commands(args, opt_file_lookup, run_uuid) for _, full_cmd in G: try: status = call(full_cmd, shell=False, cwd=args[CmdArgs.optimizer_root], timeout=timeout) if status != 0: raise ChildProcessError("status code %d returned from:\n%s" % (status, " ".join(full_cmd))) except TimeoutExpired: logger.info(f"Experiment timeout after {timeout} seconds.") print(json.dumps({"experiment_timeout_exception": " ".join(full_cmd)})) counter += 1 logger.info(f"Benchmark script ran {counter} studies successfully.") def main(): """See README for instructions on calling launcher. """ description = "Launch series of studies across functions and optimizers" args = cmd.parse_args(cmd.launcher_parser(description)) logger.setLevel(logging.INFO) # Note this is the module-wide logger if args[CmdArgs.verbose]: logger.addHandler(logging.StreamHandler()) # Get optimizer settings, says which file to call for each optimizer settings = cmd.load_optimizer_settings(args[CmdArgs.optimizer_root]) opt_file_lookup = {optimizer: wrapper_file for optimizer, (wrapper_file, _) in settings.items()} # Setup uuid if args[CmdArgs.uuid] is None: args[CmdArgs.uuid] = pyuuid.uuid4().hex # debatable if uuid1 or uuid4 is better here else: warnings.warn( "User UUID supplied. This is only desired for debugging. Careless use could lead to study id conflicts.", UserWarning, ) run_uuid = pyuuid.UUID(hex=args[CmdArgs.uuid]) assert run_uuid.hex == args[CmdArgs.uuid] logger.info("Supply --uuid %s to reproduce this run." % run_uuid.hex) # Log all the options print("Launcher options (JSON):\n") print(json.dumps({"bayesmark-launch-args": cmd.serializable_dict(args)})) print("\n") # Set the master seed (derive from the uuid we just setup) pyrandom.seed(run_uuid.int) np.random.seed(random_seed(pyrandom)) # Now run it, either to dry run file or executes sub-processes if args[CmdArgs.dry_run]: with absopen(args[CmdArgs.jobs_file], "w") as fp: dry_run(args, opt_file_lookup, run_uuid, fp) else: timeout = args[CmdArgs.timeout] if args[CmdArgs.timeout] > 0 else None real_run(args, opt_file_lookup, run_uuid, timeout) logger.info("done") if __name__ == "__main__": main() # pragma: main ================================================ FILE: bayesmark/np_util.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Utilities to that could be included in `numpy` but aren't. """ import numpy as np # np seed must be in [0, 2**32 - 1] = [0, uint32 max] SEED_MAX_INCL = np.iinfo(np.uint32).max # Access default numpy rng in way that is short and sphinx friendly random = np.random.random.__self__ def random_seed(random=random): """Draw a random seed compatible with :class:`numpy:numpy.random.RandomState`. Parameters ---------- random : :class:`numpy:numpy.random.RandomState` Random stream to use to draw the random seed. Returns ------- seed : int Seed for a new random stream in ``[0, 2**32-1)``. """ # np randint is exclusive on the high value, py randint is inclusive. We # must use inclusive limit here to work with both. We are missing one # possibility here (2**32-1), but I don't think that matters. seed = random.randint(0, SEED_MAX_INCL) return seed def shuffle_2d(X, random=random): """Generalization of :func:`numpy:numpy.random.shuffle` of 2D array. Performs in-place shuffling of `X`. So, it has no return value. Parameters ---------- X : :class:`numpy:numpy.ndarray` of shape (n, m) Array-like 2D data to shuffle in place. Shuffles order of rows and order of elements within a row. random : :class:`numpy:numpy.random.RandomState` Random stream to use to draw the random seed. """ random.shuffle(X) for rr in X: random.shuffle(rr) def strat_split(X, n_splits, inplace=False, random=random): """Make a stratified random split of items. Parameters ---------- X : :class:`numpy:numpy.ndarray` of shape (n, m) Data we would like to split randomly into groups. We should get the same number +/-1 of elements from each row in each group. n_splits : int How many groups we want to split into. inplace : bool If true, this function will cause in place modifications to `X`. random : :class:`numpy:numpy.random.RandomState` Random stream to use for reproducibility. Returns ------- Y : list(:class:`numpy:numpy.ndarray`) Stratified split of `X` where each row of `Y` contains the same number +/-1 of elements from each row of `X`. Must be a list of arrays since each row may have a different length. """ # Arguably, this function could go in stats assert np.ndim(X) == 2 assert n_splits > 0 if not inplace: X = np.array(X, copy=True) shuffle_2d(X, random=random) # Note this is like X.T.ravel() Y = np.array_split(np.ravel(X, order="F"), n_splits) # Just for good measure make sure this is shuffled too, prob not needed. shuffle_2d(Y, random=random) return Y def isclose_lte(x, y): """Check that less than or equal to (lte, ``x <= y``) is approximately true between all elements of `x` and `y`. This is similar to :func:`numpy:numpy.allclose` for equality. Shapes of all input variables must be broadcast compatible. Parameters ---------- x : :class:`numpy:numpy.ndarray` Lower limit in ``<=`` check. y : :class:`numpy:numpy.ndarray` Upper limit in ``<=`` check. Returns ------- lte : bool True if ``x <= y`` is approximately true element-wise. """ # Use np.less_equal to ensure always np type consistently lte = np.less_equal(x, y) | np.isclose(x, y) return lte def clip_chk(x, lb, ub, allow_nan=False): """Clip all element of `x` to be between `lb` and `ub` like :func:`numpy:numpy.clip`, but also check :func:`numpy:numpy.isclose`. Shapes of all input variables must be broadcast compatible. Parameters ---------- x : :class:`numpy:numpy.ndarray` Array containing elements to clip. lb : :class:`numpy:numpy.ndarray` Lower limit in clip. ub : :class:`numpy:numpy.ndarray` Upper limit in clip. allow_nan : bool If true, we allow ``nan`` to be present in `x` without out raising an error. Returns ------- x : :class:`numpy:numpy.ndarray` An array with the elements of `x`, but where values < `lb` are replaced with `lb`, and those > `ub` with `ub`. """ assert np.all(lb <= ub) # np.clip does not do this check x = np.asarray(x) # These are asserts not exceptions since clip_chk most used internally. if allow_nan: assert np.all(isclose_lte(lb, x) | np.isnan(x)) assert np.all(isclose_lte(x, ub) | np.isnan(x)) else: assert np.all(isclose_lte(lb, x)) assert np.all(isclose_lte(x, ub)) x = np.clip(x, lb, ub) return x def snap_to(x, fixed_val=None): """Snap input `x` to the `fixed_val` unless `fixed_val` is `None`, where `x` is returned. Parameters ---------- x : :class:`numpy:numpy.ndarray` Array containing elements to snap. fixed_val : :class:`numpy:numpy.ndarray` or None Values to be returned if `x` is close, otherwise an error is raised. If `fixed_val` is `None`, `x` is returned. Returns ------- fixed_val : :class:`numpy:numpy.ndarray` Snapped to value of `x`. """ if fixed_val is None: return x # Include == for discrete types where allclose doesn't work if not (np.all(x == fixed_val) or np.allclose(x, fixed_val)): raise ValueError("Expected fixed value %s, got %s." % (repr(fixed_val), repr(x))) assert np.all(x == fixed_val) or np.allclose(x, fixed_val) fixed_val = np.broadcast_to(fixed_val, np.shape(x)) return fixed_val def linear_rescale(X, lb0, ub0, lb1, ub1, enforce_bounds=True): """Linearly transform all elements of `X`, bounded between `lb0` and `ub0`, to be between `lb1` and `ub1`. Shapes of all input variables must be broadcast compatible. Parameters ---------- X : :class:`numpy:numpy.ndarray` Array containing elements to rescale. lb0 : :class:`numpy:numpy.ndarray` Current lower bound of `X`. ub0 : :class:`numpy:numpy.ndarray` Current upper bound of `X`. lb1 : :class:`numpy:numpy.ndarray` Desired lower bound of `X`. ub1 : :class:`numpy:numpy.ndarray` Desired upper bound of `X`. enforce_bounds : bool If True, perform input bounds check (and clipping if slight violation) on the input `X` and again on the output. This argument is not meant to be vectorized like the other input variables. Returns ------- X : :class:`numpy:numpy.ndarray` Elements of input `X` after linear rescaling. """ assert np.all(np.isfinite(lb0)) assert np.all(np.isfinite(lb1)) assert np.all(np.isfinite(ub0)) assert np.all(np.isfinite(ub1)) assert np.all(lb0 < ub0) assert np.all(lb1 <= ub1) m = np.true_divide(ub1 - lb1, ub0 - lb0) assert np.all(m >= 0) if enforce_bounds: X = clip_chk(X, lb0, ub0) # This will flag any non-finite X input. X = clip_chk(m * (X - lb0) + lb1, lb1, ub1) else: X = m * (X - lb0) + lb1 return X def argmin_2d(X): """Take the arg minimum of a 2D array.""" assert X.size > 0, "argmin of empty array not defined" ii, jj = np.unravel_index(X.argmin(), X.shape) return ii, jj def cummin(x_val, x_key): """Get the cumulative minimum of `x_val` when ranked according to `x_key`. Parameters ---------- x_val : :class:`numpy:numpy.ndarray` of shape (n, d) The array to get the cumulative minimum of along axis 0. x_key : :class:`numpy:numpy.ndarray` of shape (n, d) The array for ranking elements as to what is the minimum. Returns ------- c_min : :class:`numpy:numpy.ndarray` of shape (n, d) The cumulative minimum array. """ assert x_val.shape == x_key.shape assert x_val.ndim == 2 assert not np.any(np.isnan(x_key)), "cummin not defined for nan key" n, _ = x_val.shape xm = np.minimum.accumulate(x_key, axis=0) idx = np.maximum.accumulate((x_key <= xm) * np.arange(n)[:, None]) c_min = np.take_along_axis(x_val, idx, axis=0) return c_min ================================================ FILE: bayesmark/path_util.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Utilities handy for manipulating paths that have extra checks not included in `os.path`. """ import os.path import warnings def abspath(path, verify=True): # pragma: io """Combo of :func:`os.path.abspath` and :func:`os.path.expanduser` that will also check existence of directory. Parameters ---------- path : str Relative path string that can also contain home directories, e.g., ``"~/git/"``. verify : bool If true, verifies that the directory exists. Raises an assertion failure if it does not exist. Returns ------- path : str Absolute version of input path. """ path = os.path.abspath(os.path.expanduser(path)) if verify: assert os.path.isdir(path), "directory does not exist: %s" % path return path def absopen(path, mode): # pragma: io """Safe version of the built in :func:`open` that only opens absolute paths. Parameters ---------- path : str Absolute path. An assertion failure is raised if it is not absolute. mode : str Open mode, any mode understood by the built in :func:`open`, e.g., ``"r"`` or ``"w"``. Returns ------- f : file handle File handle open to use. """ assert os.path.isabs(path), "Only allowing opening of absolute paths for safety." f = open(path, mode) return f def _join_safe(*args): # pragma: io """Helper routine with commonalities between `join_safe_r` and `join_safe_w`. """ assert len(args) >= 2 path, fname = args[:-1], args[-1] path = os.path.join(*path) # Put together the dir path = abspath(path, verify=True) # Make sure dir is abs, and exists assert os.path.basename(fname) == fname, "Expected basename got %s" % fname fname = os.path.join(path, fname) # Put on the filename, must be abs # Could check abs again if really wanted to be safe return fname def join_safe_r(*args): # pragma: io """Safe version of :func:`os.path.join` that checks resulting path is absolute and the file exists for reading. Parameters ---------- *args : str varargs for parts of path to combine. The last argument must be a file name. Returns ------- fname : str Absolute path to filename. """ fname = _join_safe(*args) assert os.path.isfile(fname) # Check it exists return fname def join_safe_w(*args): # pragma: io """Safe version of :func:`os.path.join` that checks resulting path is absolute. Because this routine is for writing, if the file already exists, a warning is raised. Parameters ---------- *args : str varargs for parts of path to combine. The last argument must be a file name. Returns ------- fname : str Absolute path to filename. """ fname = _join_safe(*args) # Give a warning if it exists if os.path.isfile(fname): warnings.warn("file already exists: %s" % fname, RuntimeWarning) return fname ================================================ FILE: bayesmark/quantiles.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Compute quantiles and confidence intervals. """ import numpy as np import scipy.stats as ss from bayesmark.np_util import isclose_lte def ensure_shape(x, y): """Util to broadcast on var to another but only when shape is different. This way we don't convert scalar into array type unnecessarily. """ shape_y = np.shape(y) if np.shape(x) == shape_y: return x return np.broadcast_to(x, shape_y) def order_stats(X): """Compute order statistics on sample `X`. Follows convention that order statistic 1 is minimum and statistic n is maximum. Therefore, array elements ``0`` and ``n+1`` are ``-inf`` and ``+inf``. Parameters ---------- X : :class:`numpy:numpy.ndarray` of shape (n,) Data for order statistics. Can be vectorized. Must be sortable data type (which is almost everything). Returns ------- o_stats : :class:`numpy:numpy.ndarray` of shape (n+2,) Order statistics on `X`. """ assert np.ndim(X) >= 1 # NaN is not allowed since it does not have well defined order. assert not np.any(np.isnan(X)) X_shape = np.shape(X) inf_pad = np.full(X_shape[:-1] + (1,), np.inf) o_stats = np.concatenate((-inf_pad, np.sort(X, axis=-1), inf_pad), axis=-1) return o_stats def _quantile(n, q): idx = np.ceil(n * q).astype(int) return idx def quantile(X, q): """Computes `q` th quantile of `X`. Similar to :func:`numpy:numpy.percentile` except that it matches the mathematical definition of a quantile *and* `q` is scaled in (0,1) rather than (0,100). Parameters ---------- X : :class:`numpy:numpy.ndarray` of shape (n,) Data for quantile estimation. Can be vectorized. Must be sortable data type (which is almost everything). q : float Quantile to compute, must be in (0, 1). Can be vectorized. Returns ------- estimate : dtype of `X`, scalar Empirical `q` quantile from sample `X`. """ assert np.ndim(X) >= 1 # We could robustify things to allow the edge cases, but maybe later assert np.all(0 < q) and np.all(q < 1) # Currently don't support broadcasting both at same time assert np.ndim(X) == 1 or np.ndim(q) == 0 n = X.shape[-1] idx = _quantile(n, q) o_stats = order_stats(X) estimate = o_stats[..., idx] return estimate def _quantile_CI(n, q, alpha): # Use in case there is -inf case from being at extreme of distn idx_lower = np.fmax(0, ss.binom.ppf(alpha / 2.0, n, q)).astype(int) assert np.all(isclose_lte(ss.binom.cdf(idx_lower - 1, n, q), alpha / 2.0)) assert np.all(isclose_lte(alpha / 2.0, ss.binom.cdf(idx_lower, n, q))) assert np.all(0 <= idx_lower) and np.all(idx_lower <= n + 1) idx_upper = np.fmax(0, ss.binom.isf(alpha / 2.0, n, q)).astype(int) + 1 assert np.all(isclose_lte(ss.binom.sf(idx_upper - 1, n, q), alpha / 2.0)) assert np.all(isclose_lte(alpha / 2.0, ss.binom.sf(idx_upper - 2, n, q))) assert np.all(isclose_lte(1 - (alpha / 2.0), ss.binom.cdf(idx_upper - 1, n, q))) assert np.all(isclose_lte(ss.binom.cdf(idx_upper - 2, n, q), 1 - (alpha / 2.0))) assert np.all(0 <= idx_upper) and np.all(idx_upper <= n + 1) C = ss.binom.cdf(idx_upper - 1, n, q) - ss.binom.cdf(idx_lower - 1, n, q) assert np.all(isclose_lte(1.0 - alpha, C)) return idx_lower, idx_upper def quantile_CI(X, q, alpha=0.05): """Calculate CI on `q` quantile from same `X` using nonparametric estimation from order statistics. This will have alpha level of at most `alpha` due to the discrete nature of order statistics. Parameters ---------- X : :class:`numpy:numpy.ndarray` of shape (n,) Data for quantile estimation. Can be vectorized. Must be sortable data type (which is almost everything). q : float Quantile to compute, must be in (0, 1). Can be vectorized. alpha : float False positive rate we allow for CI, must be in (0, 1). Can be vectorized. Returns ------- LB : dtype of `X`, scalar Lower end on CI UB : dtype of `X`, scalar Upper end on CI """ assert np.ndim(X) >= 1 # We could robustify things to allow the edge cases, but maybe later assert np.all(0 < q) and np.all(q < 1) assert np.all(0 < alpha) and np.all(alpha < 1) # Currently don't support broadcasting both at same time assert np.ndim(X) == 1 or (np.ndim(q) == 0 and np.ndim(alpha) == 0) n = X.shape[-1] idx_lower, idx_upper = _quantile_CI(n, q, alpha) o_stats = order_stats(X) LB, UB = o_stats[..., idx_lower], o_stats[..., idx_upper] return LB, UB def max_quantile_CI(X, q, m, alpha=0.05): """Calculate CI on `q` quantile of distribution on max of `m` iid samples using a data set `X`. This uses nonparametric estimation from order statistics and will have alpha level of at most `alpha` due to the discrete nature of order statistics. Parameters ---------- X : :class:`numpy:numpy.ndarray` of shape (n,) Data for quantile estimation. Can be vectorized. Must be sortable data type (which is almost everything). q : float Quantile to compute, must be in (0, 1). Can be vectorized. m : int Compute statistics for distribution on max over `m` samples. Must be ``>= 1``. Can be vectorized. alpha : float False positive rate we allow for CI, must be in (0, 1). Can be vectorized. Returns ------- estimate : dtype of `X`, scalar Best estimate on `q` quantile on max over `m` iid samples. LB : dtype of `X`, scalar Lower end on CI UB : dtype of `X`, scalar Upper end on CI """ # X and alpha used/checked below in quantile_CI routine. # We could robustify things to allow the edge cases, but maybe later assert np.all(0 < q) and np.all(q < 1) # Could check int but if someone wants to interpolate, we will let them. assert np.all(m >= 1) # Currently don't support broadcasting both at same time assert np.ndim(X) == 1 or (np.ndim(q) == 0 and np.ndim(q) == 0 and np.ndim(alpha) == 0) q = q ** (1.0 / m) o_stats = order_stats(X) n = X.shape[-1] idx = _quantile(n, q) idx_lower, idx_upper = _quantile_CI(n, q, alpha=alpha) LB, UB = o_stats[..., idx_lower], o_stats[..., idx_upper] # Might need to broadcast estimate out if vectorization is in alpha estimate = ensure_shape(o_stats[..., idx], LB) return estimate, LB, UB def min_quantile_CI(X, q, m, alpha=0.05): """Calculate confidence interval on `q` quantile of distribution on min of `m` iid samples using a data set `X`. This uses nonparametric estimation from order statistics and will have alpha level of at most `alpha` due to the discrete nature of order statistics. Parameters ---------- X : :class:`numpy:numpy.ndarray` of shape (n,) Data for quantile estimation. Can be vectorized. Must be sortable data type (which is almost everything). q : float Quantile to compute, must be in (0, 1). Can be vectorized. m : int Compute statistics for distribution on min over `m` samples. Must be ``>= 1``. Can be vectorized. alpha : float False positive rate we allow for CI, must be in (0, 1). Can be vectorized. Returns ------- estimate : dtype of `X`, scalar Best estimate on `q` quantile on min over `m` iid samples. LB : dtype of `X`, scalar Lower end on CI UB : dtype of `X`, scalar Upper end on CI """ # X and alpha used/checked below in quantile_CI routine. # We could robustify things to allow the edge cases, but maybe later assert np.all(0 < q) and np.all(q < 1) # Could check int but if someone wants to interp, we will let them. assert np.all(m >= 1) # Currently don't support broadcasting both at same time assert np.ndim(X) == 1 or (np.ndim(q) == 0 and np.ndim(q) == 0 and np.ndim(alpha) == 0) # This might have numerics issues for small q q = 1.0 - (1.0 - q) ** (1.0 / m) o_stats = order_stats(X) n = X.shape[-1] idx = _quantile(n, q) idx_lower, idx_upper = _quantile_CI(n, q, alpha=alpha) LB, UB = o_stats[..., idx_lower], o_stats[..., idx_upper] # Might need to broadcast estimate out if vectorization is in alpha estimate = ensure_shape(o_stats[..., idx], LB) return estimate, LB, UB def quantile_and_CI(X, q, alpha=0.05): """Calculate CI on `q` quantile from same `X` using nonparametric estimation from order statistics. This will have alpha level of at most `alpha` due to the discrete nature of order statistics. Parameters ---------- X : :class:`numpy:numpy.ndarray` of shape (n,) Data for quantile estimation. Can be vectorized. Must be sortable data type (which is almost everything). q : float Quantile to compute, must be in (0, 1). Can be vectorized. alpha : float False positive rate we allow for CI, must be in (0, 1). Can be vectorized. Returns ------- estimate : dtype of `X`, scalar Empirical `q` quantile from sample `X`. LB : dtype of `X`, scalar Lower end on CI UB : dtype of `X`, scalar Upper end on CI """ # This routine is mostly just a wrapper routine estimate, LB, UB = max_quantile_CI(X, q=q, m=1, alpha=alpha) return estimate, LB, UB ================================================ FILE: bayesmark/random_search.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """A baseline random search in our standardized optimizer interface. Useful for baselines. """ import numpy as np from bayesmark import np_util from bayesmark.space import JointSpace def suggest_dict(X, y, meta, n_suggestions=1, random=np_util.random): """Stateless function to create suggestions for next query point in random search optimization. This implements the API for general structures of different data types. Parameters ---------- X : list(dict) Places where the objective function has already been evaluated. Not actually used in random search. y : :class:`numpy:numpy.ndarray`, shape (n,) Corresponding values where objective has been evaluated. Not actually used in random search. meta : dict(str, dict) Configuration of the optimization variables. See API description. n_suggestions : int Desired number of parallel suggestions in the output random : :class:`numpy:numpy.random.RandomState` Optionally pass in random stream for reproducibility. Returns ------- next_guess : list(dict) List of `n_suggestions` suggestions to evaluate the objective function. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. """ # Warp and get bounds space_x = JointSpace(meta) X_warped = space_x.warp(X) bounds = space_x.get_bounds() _, n_params = _check_x_y(X_warped, y, allow_impute=True) lb, ub = _check_bounds(bounds, n_params) # Get the suggestion suggest_x = random.uniform(lb, ub, size=(n_suggestions, n_params)) # Unwarp next_guess = space_x.unwarp(suggest_x) return next_guess def _check_x_y(X, y, allow_impute=False): # pragma: validator """Input validation for `suggest` routine.""" if not (np.ndim(X) == 2): raise ValueError("X must be 2-dimensional got %s." % str(np.shape(X))) n_obs, n_params = np.shape(X) assert n_params >= 1, "We do not support suggest on empty space." if not (np.shape(y) == (n_obs,)): raise ValueError("y must be %s not %s." % (str((n_obs,)), str(np.shape(y)))) if not np.all(np.isfinite(X)): raise ValueError("X must be finite.") n_real_obs = n_obs if allow_impute: if not np.all(np.isfinite(y) | np.isnan(y)): raise ValueError("y can't contain infs even with data imputation.") n_real_obs = np.sum(np.isfinite(y)) else: if not np.all(np.isfinite(y)): raise ValueError("y must be finite when data imputation not used.") return n_real_obs, n_params def _check_bounds(bounds, n_params): # pragma: validator """Input validation for `suggest` routine.""" if not (np.shape(bounds) == (n_params, 2)): raise ValueError("bounds must have shape %s not %s." % (str((n_params, 2)), str(np.shape(bounds)))) lb, ub = np.asarray(bounds).T if not (np.all(np.isfinite(lb)) and np.all(np.isfinite(ub))): raise ValueError("bounds must be finite.") if not (np.all(lb <= ub)): raise ValueError("lower bound must be less than upper bound.") return lb, ub ================================================ FILE: bayesmark/serialize.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """A serialization abstraction layer (SAL) to save and load experimental results. All IO of experimental results should go through this module. This makes changing the backend (between different databases) transparent to the benchmark code. """ import json import os import uuid from abc import ABC, abstractmethod from datetime import datetime from tempfile import mkdtemp import xarray as xr from pathvalidate.argparse import validate_filename, validate_filepath from bayesmark.path_util import join_safe_r, join_safe_w from bayesmark.util import chomp, str_join_safe NEWLINE = "\n" # Just to be explicit, in case this ever gets run on Windows PREFIX_FMT = "bo_%Y%m%d_%H%M%S_" # The format we use for generating a new database name if none is specified _XR_EXT = ".json" # Extension we use for dumping xr.Dataset variables _LOG_EXT = ".log" # Extension to reccomend for logging files _DERIVED_DIR = "derived" # The folder for dervied variables (datasets) _LOGGING_DIR = "log" # The folder to reccomend for logging _SETUP_STR = """ User must ensure %s exists, and setup folder using mkdir %s User must ensure equal reps of each optimizer for unbiased results.""" class Serializer(ABC): """Abstract base class for the serialization abstraction layer. """ @staticmethod @abstractmethod def init_db(db_root, keys, db=None, exist_ok=True): """Initialize a "database" for storing data at the specified location. Parameters ---------- db_root : str Absolute path to the database. keys : list(str) The variable names (or keys) we will store in the database for non-derived data. db : str The name of the database. If ``None``, a non-conflicting name will be generated. exist_ok : bool If true, do not raise an error if this database already exists. Returns ------- db : str The name of the database. """ pass @staticmethod @abstractmethod def get_keys(db_root, db): """List the non-derived keys available in the database. Parameters ---------- db_root : str Absolute path to the database. db : str The name of the database. Returns ------- keys : list(str) The variable names (or keys) in the database for non-derived data. """ pass @staticmethod @abstractmethod def get_derived_keys(db_root, db): """List the derived keys currently available in the database. Parameters ---------- db_root : str Absolute path to the database. db : str The name of the database. Returns ------- keys : list(str) The variable names (or keys) in the database for derived data. """ pass @staticmethod @abstractmethod def get_uuids(db_root, db, key): """List the UUIDs for the versions of a variable (non-derived key) available in the database. Parameters ---------- db_root : str Absolute path to the database. db : str The name of the database. keys : str The variable name in the database for non-derived data. Returns ------- uuids : list(uuid.UUID) The UUIDs for the versions of this key. """ pass @staticmethod @abstractmethod def save(data, meta, db_root, db, key, uuid_): """Abstract method for saving experimental data, details require the type of `data`. """ pass @staticmethod @abstractmethod def load(db_root, db, key, uuid_): """Abstract method for loading experimental data, details require the type of `data`. """ pass @staticmethod @abstractmethod def save_derived(data, meta, db_root, db, key): """Abstract method for saving derived data, details require the type of `data`. """ pass @staticmethod @abstractmethod def load_derived(db_root, db, key): """Abstract method for loading derived data, details require the type of `data`. """ pass class XRSerializer(Serializer): """Serialization layer when saving and loading `xarray` datasets (currently) as `json`. """ def init_db(db_root, keys, db=None, exist_ok=True): # pragma: io XRSerializer._validate(db_root, keys, db) if db is None: folder_prefix = datetime.utcnow().strftime(PREFIX_FMT) exp_subdir = mkdtemp(prefix=folder_prefix, dir=db_root) db = os.path.basename(exp_subdir) assert db.startswith(folder_prefix) assert os.path.join(db_root, db) == exp_subdir else: exp_subdir = os.path.join(db_root, db) os.makedirs(exp_subdir, exist_ok=exist_ok) subdirs = [_DERIVED_DIR, _LOGGING_DIR] + list(keys) for subd in subdirs: os.makedirs(os.path.join(exp_subdir, subd), exist_ok=exist_ok) return db def init_db_manual(db_root, keys, db): """Instruction for how one would manually initialize the "database" on another system. Parameters ---------- db_root : str Absolute path to the database. keys : list(str) The variable names (or keys) we will store in the database for non-derived data. db : str The name of the database. Returns ------- manual_setup_info : str The setup instructions. """ XRSerializer._validate(db_root, keys, db) assert db is not None, "Must specify db name to setup manually." exp_subdir = os.path.join(db_root, db) subdirs = [_DERIVED_DIR, _LOGGING_DIR] + list(keys) manual_setup_info = _SETUP_STR % (exp_subdir, str_join_safe(" ", subdirs)) return manual_setup_info def get_keys(db_root, db): # pragma: io XRSerializer._validate(db_root, keys=(), db=db) keys = sorted(os.listdir(os.path.join(db_root, db))) keys.remove(_DERIVED_DIR) keys.remove(_LOGGING_DIR) return keys def get_derived_keys(db_root, db): # pragma: io XRSerializer._validate(db_root, keys=(), db=db) fnames = sorted(os.listdir(os.path.join(db_root, db, _DERIVED_DIR))) keys = [XRSerializer._fname_to_key(ff) for ff in fnames] return keys def get_uuids(db_root, db, key): # pragma: io XRSerializer._validate(db_root, keys=[key], db=db) fnames = sorted(os.listdir(os.path.join(db_root, db, key))) uuids = [XRSerializer._fname_to_uuid(ff) for ff in fnames] return uuids def save(data, meta, db_root, db, key, uuid_): # pragma: io """Save a dataset under a key name in the database. Parameters ---------- data : :class:`xarray:xarray.Dataset` An :class:`xarray:xarray.Dataset` variable we would like to store as non-derived data from an experiment. meta : json-serializable Associated meta-data with the experiment. This can be anything json serializable. db_root : str Absolute path to the database. db : str The name of the database. key : str The variable name in the database for the data. uuid_ : uuid.UUID The UUID to represent the version of this variable we are storing. """ XRSerializer._validate(db_root, keys=[key], db=db) fname = XRSerializer._uuid_to_fname(uuid_) path = (db_root, db, key, fname) with open(join_safe_w(*path), "w") as f: _dump_xr(f, ds=data, meta=meta) def load(db_root, db, key, uuid_): # pragma: io """Load a dataset under a key name in the database. This is the inverse of :func:`.save`. Parameters ---------- db_root : str Absolute path to the database. db : str The name of the database. key : str The variable name in the database for the data. uuid_ : uuid.UUID The UUID to represent the version of this variable we want to load. Returns ------- data : :class:`xarray:xarray.Dataset` An :class:`xarray:xarray.Dataset` variable for the non-derived data from an experiment. meta : json-serializable Associated meta-data with the experiment. This can be anything json serializable. """ XRSerializer._validate(db_root, keys=[key], db=db) fname = XRSerializer._uuid_to_fname(uuid_) path = (db_root, db, key, fname) with open(join_safe_r(*path), "r") as f: ds, meta = _load_xr(f) return ds, meta def save_derived(data, meta, db_root, db, key): # pragma: io """Save a dataset under a key name in the database as derived data. Parameters ---------- data : :class:`xarray:xarray.Dataset` An :class:`xarray:xarray.Dataset` variable we would like to store as derived data from experiments. meta : json-serializable Associated meta-data with the experiments. This can be anything json serializable. db_root : str Absolute path to the database. db : str The name of the database. key : str The variable name in the database for the data. """ XRSerializer._validate(db_root, keys=[key], db=db) fname = XRSerializer._key_to_fname(key) path = (db_root, db, _DERIVED_DIR, fname) with open(join_safe_w(*path), "w") as f: _dump_xr(f, ds=data, meta=meta) def load_derived(db_root, db, key): # pragma: io """Load a dataset under a key name in the database as derived data. This is the inverse of :func:`.save_derived`. Parameters ---------- db_root : str Absolute path to the database. db : str The name of the database. key : str The variable name in the database for the data. Returns ------- data : :class:`xarray:xarray.Dataset` An :class:`xarray:xarray.Dataset` variable for the derived data from experiments. meta : json-serializable Associated meta-data with the experiments. This can be anything json serializable. """ XRSerializer._validate(db_root, keys=[key], db=db) fname = XRSerializer._key_to_fname(key) path = (db_root, db, _DERIVED_DIR, fname) with open(join_safe_r(*path), "r") as f: data, meta = _load_xr(f) return data, meta def logging_path(db_root, db, uuid_): # pragma: io """Get an absolute path for logging from an experiment given its UUID. Parameters ---------- db_root : str Absolute path to the database. db : str The name of the database. uuid_ : uuid.UUID The UUID to represent this experiment. Returns ------- logfile : str Absolute path suitable for logging in this experiment. """ XRSerializer._validate(db_root, keys=(), db=db) assert isinstance(uuid_, uuid.UUID) fname = uuid_.hex + _LOG_EXT logfile = join_safe_w(db_root, db, _LOGGING_DIR, fname) return logfile def _fname_to_uuid(fname): uuid_ = uuid.UUID(chomp(fname, _XR_EXT)) return uuid_ def _uuid_to_fname(uuid_): assert isinstance(uuid_, uuid.UUID) # This can be eliminated once we use type hints fname = uuid_.hex + _XR_EXT return fname def _key_to_fname(key): fname = key + _XR_EXT return fname def _fname_to_key(fname): key = chomp(fname, _XR_EXT) return key def _validate(db_root, keys=(), db=None): validate_filepath(db_root, platform="auto") assert os.path.isabs(db_root), "db_root must be absolute path" if db is not None: validate_filename(db, platform="universal") for kk in keys: validate_filename(kk, platform="universal") def _dump_xr(f, ds, meta): # pragma: io """Helper routine to `XRSerializer.save` and `XRSerializer.save_derived`. """ assert isinstance(ds, xr.Dataset) # Requiring Dataset and not DataArray for now meta_json = json.dumps(meta) # meta can be anything that json can handle # JSON dumps seems pretty good about escaping, but check to be sure assert NEWLINE not in meta_json # Built in json dumper doesn't allow us to only line break on top-level, so we manually do this for now f.write('{"meta": %s,' % meta_json) f.write(NEWLINE) f.write('"data": ') json.dump(ds.to_dict(), f) f.write("}") f.write(NEWLINE) def _load_xr(f): # pragma: io """Helper routine to `XRSerializer.load` and`XRSerializer.load_derived`. """ all_json = json.load(f) meta = all_json.pop("meta") ds = xr.Dataset.from_dict(all_json.pop("data")) return ds, meta ================================================ FILE: bayesmark/signatures.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Routines to compute and compare the "signatures" of objective functions. These are useful to make sure two different studies were actually optimizing the same objective function (even if they say the same test case in the meta-data). """ import warnings import numpy as np import pandas as pd import bayesmark.random_search as rs # How many points to probe the function to get the signature N_SUGGESTIONS = 5 def get_func_signature(f, api_config): """Get the function signature for an objective function in an experiment. Parameters ---------- f : typing.Callable The objective function we want to compute the signature of. This function must take inputs in the form of ``dict(str, object)`` with one dictionary key per variable, and provide `float` as the output. api_config : dict(str, dict) Configuration of the optimization variables. See API description. Returns ------- signature_x : list(dict(str, object)) of shape (n_suggest,) The input locations probed on signature call. signature_y : list(float) of shape (n_suggest,) The objective function values at the inputs points. This is the real signature. """ # Make sure get same sequence on every call to be a signature random = np.random.RandomState(0) signature_x = rs.suggest_dict([], [], api_config, n_suggestions=N_SUGGESTIONS, random=random) # For now, we only take the first output as the signature. We can generalize this later. signature_y = [f(xx)[0] for xx in signature_x] assert np.all(np.isfinite(signature_y)), "non-finite values found in signature for function" return signature_x, signature_y def analyze_signatures(signatures): """Analyze function signatures from the experiment. Parameters ---------- signatures : dict(str, list(list(float))) The signatures should all be the same length, so it should be 2D array like. Returns ------- sig_errs : :class:`pandas:pandas.DataFrame` rows are test cases, columns are test points. signatures_median : dict(str, list(float)) Median signature across all repetition per test case. """ sig_errs = {} signatures_median = {} for test_case, signature_y in signatures.items(): assert len(signature_y) > 0, "signature with no cases found" assert np.all(np.isfinite(signature_y)), "non-finite values found in signature for function" minval = np.min(signature_y, axis=0) maxval = np.max(signature_y, axis=0) if not np.allclose(minval, maxval): # Arguably, the util should not raise the warning, and these should # be raised on the outside, but let's do this for simplicity. warnings.warn( "Signature diverged on %s betwen %s and %s" % (test_case, str(minval), str(maxval)), RuntimeWarning ) sig_errs[test_case] = maxval - minval # ensure serializable using tolist signatures_median[test_case] = np.median(signature_y, axis=0).tolist() # Convert to pandas so easy to append margins with max, better for disp. # If we let the user convert to pandas then we don't need dep on pandas. sig_errs = pd.DataFrame(sig_errs).T sig_errs.loc["max", :] = sig_errs.max(axis=0) sig_errs.loc[:, "max"] = sig_errs.max(axis=1) return sig_errs, signatures_median def analyze_signature_pair(signatures, signatures_ref): """Analyze a pair of signatures (often from two sets of experiments) and return the error between them. Parameters ---------- signatures : dict(str, list(float)) Signatures from set of experiments. The signatures must all be the same length, so it should be 2D array like. signatures_ref : dict(str, list(float)) The signatures from a reference set of experiments. The keys in `signatures` must be a subset of the signatures in `signatures_ref`. Returns ------- sig_errs : :class:`pandas:pandas.DataFrame` rows are test cases, columns are test points. signatures_median : dict(str, list(float)) Median signature across all repetition per test case. """ signatures_pair = {kk: [signatures[kk], signatures_ref[kk]] for kk in signatures} sig_errs, signatures_pair = analyze_signatures(signatures_pair) return sig_errs, signatures_pair ================================================ FILE: bayesmark/sklearn_funcs.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ Routines to build a standardized interface to make `sklearn` hyper-parameter tuning problems look like an objective function. This file mostly contains a dictionary collection of all sklearn test funcs. The format of each element in `MODELS` is: model_name: (model_class, fixed_param_dict, search_param_api_dict) `model_name` is an arbitrary name to refer to a certain strategy. At usage time, the optimizer instance is created using: ``model_class(**kwarg_dict)`` The kwarg dict is `fixed_param_dict` + `search_param_dict`. The `search_param_dict` comes from a optimizer which is configured using the `search_param_api_dict`. See the API description for information on setting up the `search_param_api_dict`. """ import os.path import pickle as pkl import warnings from abc import ABC, abstractmethod import numpy as np from sklearn.ensemble import AdaBoostClassifier, AdaBoostRegressor, RandomForestClassifier, RandomForestRegressor from sklearn.linear_model import Lasso, LogisticRegression, Ridge from sklearn.metrics import get_scorer from sklearn.model_selection import cross_val_score, train_test_split from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor from sklearn.neural_network import MLPClassifier, MLPRegressor from sklearn.svm import SVC, SVR from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from bayesmark.constants import ARG_DELIM, METRICS, MODEL_NAMES, VISIBLE_TO_OPT from bayesmark.data import METRICS_LOOKUP, ProblemType, get_problem_type, load_data from bayesmark.path_util import absopen from bayesmark.space import JointSpace from bayesmark.util import str_join_safe # Using 3 would be faster, but 5 is the most realistic CV split (5-fold) CV_SPLITS = 5 # We should add cat variables into some of these configurations but a lot of # the wrappers for the BO methods really have trouble with cat types. # kNN knn_cfg = { "n_neighbors": {"type": "int", "space": "linear", "range": (1, 25)}, "p": {"type": "int", "space": "linear", "range": (1, 4)}, } # SVM svm_cfg = { "C": {"type": "real", "space": "log", "range": (1.0, 1e3)}, "gamma": {"type": "real", "space": "log", "range": (1e-4, 1e-3)}, "tol": {"type": "real", "space": "log", "range": (1e-5, 1e-1)}, } # DT dt_cfg = { "max_depth": {"type": "int", "space": "linear", "range": (1, 15)}, "min_samples_split": {"type": "real", "space": "logit", "range": (0.01, 0.99)}, "min_samples_leaf": {"type": "real", "space": "logit", "range": (0.01, 0.49)}, "min_weight_fraction_leaf": {"type": "real", "space": "logit", "range": (0.01, 0.49)}, "max_features": {"type": "real", "space": "logit", "range": (0.01, 0.99)}, "min_impurity_decrease": {"type": "real", "space": "linear", "range": (0.0, 0.5)}, } # RF rf_cfg = { "max_depth": {"type": "int", "space": "linear", "range": (1, 15)}, "max_features": {"type": "real", "space": "logit", "range": (0.01, 0.99)}, "min_samples_split": {"type": "real", "space": "logit", "range": (0.01, 0.99)}, "min_samples_leaf": {"type": "real", "space": "logit", "range": (0.01, 0.49)}, "min_weight_fraction_leaf": {"type": "real", "space": "logit", "range": (0.01, 0.49)}, "min_impurity_decrease": {"type": "real", "space": "linear", "range": (0.0, 0.5)}, } # MLP with ADAM mlp_adam_cfg = { "hidden_layer_sizes": {"type": "int", "space": "linear", "range": (50, 200)}, "alpha": {"type": "real", "space": "log", "range": (1e-5, 1e1)}, "batch_size": {"type": "int", "space": "linear", "range": (10, 250)}, "learning_rate_init": {"type": "real", "space": "log", "range": (1e-5, 1e-1)}, "tol": {"type": "real", "space": "log", "range": (1e-5, 1e-1)}, "validation_fraction": {"type": "real", "space": "logit", "range": (0.1, 0.9)}, "beta_1": {"type": "real", "space": "logit", "range": (0.5, 0.99)}, "beta_2": {"type": "real", "space": "logit", "range": (0.9, 1.0 - 1e-6)}, "epsilon": {"type": "real", "space": "log", "range": (1e-9, 1e-6)}, } # MLP with SGD mlp_sgd_cfg = { "hidden_layer_sizes": {"type": "int", "space": "linear", "range": (50, 200)}, "alpha": {"type": "real", "space": "log", "range": (1e-5, 1e1)}, "batch_size": {"type": "int", "space": "linear", "range": (10, 250)}, "learning_rate_init": {"type": "real", "space": "log", "range": (1e-5, 1e-1)}, "power_t": {"type": "real", "space": "logit", "range": (0.1, 0.9)}, "tol": {"type": "real", "space": "log", "range": (1e-5, 1e-1)}, "momentum": {"type": "real", "space": "logit", "range": (0.001, 0.999)}, "validation_fraction": {"type": "real", "space": "logit", "range": (0.1, 0.9)}, } # AdaBoostClassifier ada_cfg = { "n_estimators": {"type": "int", "space": "linear", "range": (10, 100)}, "learning_rate": {"type": "real", "space": "log", "range": (1e-4, 1e1)}, } # lasso lasso_cfg = { "C": {"type": "real", "space": "log", "range": (1e-2, 1e2)}, "intercept_scaling": {"type": "real", "space": "log", "range": (1e-2, 1e2)}, } # linear linear_cfg = { "C": {"type": "real", "space": "log", "range": (1e-2, 1e2)}, "intercept_scaling": {"type": "real", "space": "log", "range": (1e-2, 1e2)}, } MODELS_CLF = { "kNN": (KNeighborsClassifier, {}, knn_cfg), "SVM": (SVC, {"kernel": "rbf", "probability": True}, svm_cfg), "DT": (DecisionTreeClassifier, {"max_leaf_nodes": None}, dt_cfg), "RF": (RandomForestClassifier, {"n_estimators": 10, "max_leaf_nodes": None}, rf_cfg), "MLP-adam": (MLPClassifier, {"solver": "adam", "early_stopping": True}, mlp_adam_cfg), "MLP-sgd": ( MLPClassifier, {"solver": "sgd", "early_stopping": True, "learning_rate": "invscaling", "nesterovs_momentum": True}, mlp_sgd_cfg, ), "ada": (AdaBoostClassifier, {}, ada_cfg), "lasso": ( LogisticRegression, {"penalty": "l1", "fit_intercept": True, "solver": "liblinear", "multi_class": "ovr"}, lasso_cfg, ), "linear": ( LogisticRegression, {"penalty": "l2", "fit_intercept": True, "solver": "liblinear", "multi_class": "ovr"}, linear_cfg, ), } # For now, we will assume the default is to go thru all classifiers assert sorted(MODELS_CLF.keys()) == sorted(MODEL_NAMES) ada_cfg_reg = { "n_estimators": {"type": "int", "space": "linear", "range": (10, 100)}, "learning_rate": {"type": "real", "space": "log", "range": (1e-4, 1e1)}, } lasso_cfg_reg = { "alpha": {"type": "real", "space": "log", "range": (1e-2, 1e2)}, "fit_intercept": {"type": "bool"}, "normalize": {"type": "bool"}, "max_iter": {"type": "int", "space": "log", "range": (10, 5000)}, "tol": {"type": "real", "space": "log", "range": (1e-5, 1e-1)}, "positive": {"type": "bool"}, } linear_cfg_reg = { "alpha": {"type": "real", "space": "log", "range": (1e-2, 1e2)}, "fit_intercept": {"type": "bool"}, "normalize": {"type": "bool"}, "max_iter": {"type": "int", "space": "log", "range": (10, 5000)}, "tol": {"type": "real", "space": "log", "range": (1e-4, 1e-1)}, } MODELS_REG = { "kNN": (KNeighborsRegressor, {}, knn_cfg), "SVM": (SVR, {"kernel": "rbf"}, svm_cfg), "DT": (DecisionTreeRegressor, {"max_leaf_nodes": None}, dt_cfg), "RF": (RandomForestRegressor, {"n_estimators": 10, "max_leaf_nodes": None}, rf_cfg), "MLP-adam": (MLPRegressor, {"solver": "adam", "early_stopping": True}, mlp_adam_cfg), "MLP-sgd": ( MLPRegressor, # regression crashes often with relu { "activation": "tanh", "solver": "sgd", "early_stopping": True, "learning_rate": "invscaling", "nesterovs_momentum": True, }, mlp_sgd_cfg, ), "ada": (AdaBoostRegressor, {}, ada_cfg_reg), "lasso": (Lasso, {}, lasso_cfg_reg), "linear": (Ridge, {"solver": "auto"}, linear_cfg_reg), } # If both classifiers and regressors match MODEL_NAMES then the experiment # launcher can simply go thru the cartesian product and do all combos. assert sorted(MODELS_REG.keys()) == sorted(MODEL_NAMES) class TestFunction(ABC): """Abstract base class for test functions in the benchmark. These do not need to be ML hyper-parameter tuning. """ def __init__(self): """Setup general test function for benchmark. We assume the test function knows the meta-data about the search space, but is also stateless to fit modeling assumptions. To keep stateless, it does not do things like count the number of function evaluations. """ # This will need to be set before using other routines self.api_config = None @abstractmethod def evaluate(self, params): """Abstract method to evaluate the function at a parameter setting. """ def get_api_config(self): """Get the API config for this test problem. Returns ------- api_config : dict(str, dict(str, object)) The API config for the used model. See README for API description. """ assert self.api_config is not None, "API config is not set." return self.api_config class SklearnModel(TestFunction): """Test class for sklearn classifier/regressor CV score objective functions. """ # Map our short names for metrics to the full length sklearn name _METRIC_MAP = { "nll": "neg_log_loss", "acc": "accuracy", "mae": "neg_mean_absolute_error", "mse": "neg_mean_squared_error", } # This can be static and constant for now objective_names = (VISIBLE_TO_OPT, "generalization") def __init__(self, model, dataset, metric, shuffle_seed=0, data_root=None): """Build class that wraps sklearn classifier/regressor CV score for use as an objective function. Parameters ---------- model : str Which classifier to use, must be key in `MODELS_CLF` or `MODELS_REG` dict depending on if dataset is classification or regression. dataset : str Which data set to use, must be key in `DATA_LOADERS` dict, or name of custom csv file. metric : str Which sklearn scoring metric to use, in `SCORERS_CLF` list or `SCORERS_REG` dict depending on if dataset is classification or regression. shuffle_seed : int Random seed to use when splitting the data into train and validation in the cross-validation splits. This is needed in order to keep the split constant across calls. Otherwise there would be extra noise in the objective function for varying splits. data_root : str Root directory to look for all custom csv files. """ TestFunction.__init__(self) data, target, problem_type = load_data(dataset, data_root=data_root) assert problem_type in (ProblemType.clf, ProblemType.reg) self.is_classifier = problem_type == ProblemType.clf # Do some validation on loaded data assert isinstance(data, np.ndarray) assert isinstance(target, np.ndarray) assert data.ndim == 2 and target.ndim == 1 assert data.shape[0] == target.shape[0] assert data.size > 0 assert data.dtype == np.float_ assert np.all(np.isfinite(data)) # also catch nan assert target.dtype == (np.int_ if self.is_classifier else np.float_) assert np.all(np.isfinite(target)) # also catch nan model_lookup = MODELS_CLF if self.is_classifier else MODELS_REG base_model, fixed_params, api_config = model_lookup[model] # New members for model self.base_model = base_model self.fixed_params = fixed_params self.api_config = api_config # Always shuffle your data to be safe. Use fixed seed for reprod. self.data_X, self.data_Xt, self.data_y, self.data_yt = train_test_split( data, target, test_size=0.2, random_state=shuffle_seed, shuffle=True ) assert metric in METRICS, "Unknown metric %s" % metric assert metric in METRICS_LOOKUP[problem_type], "Incompatible metric %s with problem type %s" % ( metric, problem_type, ) self.scorer = get_scorer(SklearnModel._METRIC_MAP[metric]) def evaluate(self, params): """Evaluate the sklearn CV objective at a particular parameter setting. Parameters ---------- params : dict(str, object) The varying (non-fixed) parameter dict to the sklearn model. Returns ------- cv_loss : float Average loss over CV splits for sklearn model when tested using the settings in params. """ params = dict(params) # copy to avoid modification of original params.update(self.fixed_params) # add in fixed params # now build the skl object clf = self.base_model(**params) assert np.all(np.isfinite(self.data_X)), "all features must be finite" assert np.all(np.isfinite(self.data_y)), "all targets must be finite" # Do the x-val, ignore user warn since we expect BO to try weird stuff with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=UserWarning) S = cross_val_score(clf, self.data_X, self.data_y, scoring=self.scorer, cv=CV_SPLITS) # Take the mean score across all x-val splits cv_score = np.mean(S) # Now let's get the generalization error for same hypers clf = self.base_model(**params) clf.fit(self.data_X, self.data_y) generalization_score = self.scorer(clf, self.data_Xt, self.data_yt) # get_scorer makes everything a score not a loss, so we need to negate to get the loss back cv_loss = -cv_score assert np.isfinite(cv_loss), "loss not even finite" generalization_loss = -generalization_score assert np.isfinite(generalization_loss), "loss not even finite" # Unbox to basic float to keep it simple cv_loss = cv_loss.item() assert isinstance(cv_loss, float) generalization_loss = generalization_loss.item() assert isinstance(generalization_loss, float) # For now, score with same objective. We can later add generalization error return cv_loss, generalization_loss @staticmethod def test_case_str(model, dataset, scorer): """Generate the combined test case string from model, dataset, and scorer combination.""" test_case = str_join_safe(ARG_DELIM, (model, dataset, scorer)) return test_case @staticmethod def inverse_test_case_str(test_case): """Inverse of `test_case_str`.""" model, dataset, scorer = test_case.split(ARG_DELIM) assert test_case == SklearnModel.test_case_str(model, dataset, scorer) return model, dataset, scorer class SklearnSurrogate(TestFunction): """Test class for sklearn classifier/regressor CV score objective function surrogates. """ # This can be static and constant for now objective_names = (VISIBLE_TO_OPT, "generalization") def __init__(self, model, dataset, scorer, path): """Build class that wraps sklearn classifier/regressor CV score for use as an objective function surrogate. Parameters ---------- model : str Which classifier to use, must be key in `MODELS_CLF` or `MODELS_REG` dict depending on if dataset is classification or regression. dataset : str Which data set to use, must be key in `DATA_LOADERS` dict, or name of custom csv file. scorer : str Which sklearn scoring metric to use, in `SCORERS_CLF` list or `SCORERS_REG` dict depending on if dataset is classification or regression. path : str Root directory to look for all pickle files. """ TestFunction.__init__(self) # Find the space class, we could consider putting this in pkl too problem_type = get_problem_type(dataset) assert problem_type in (ProblemType.clf, ProblemType.reg) _, _, self.api_config = MODELS_CLF[model] if problem_type == ProblemType.clf else MODELS_REG[model] self.space = JointSpace(self.api_config) # Load the pre-trained model fname = SklearnModel.test_case_str(model, dataset, scorer) + ".pkl" if isinstance(path, bytes): # This is for test-ability, we could use mock instead. self.model = pkl.loads(path) else: path = os.path.join(path, fname) # pragma: io assert os.path.isfile(path), "Model file not found: %s" % path with absopen(path, "rb") as f: # pragma: io self.model = pkl.load(f) # pragma: io assert callable(getattr(self.model, "predict", None)) def evaluate(self, params): """Evaluate the sklearn CV objective at a particular parameter setting. Parameters ---------- params : dict(str, object) The varying (non-fixed) parameter dict to the sklearn model. Returns ------- overall_loss : float Average loss over CV splits for sklearn model when tested using the settings in params. """ x = self.space.warp([params]) y, = self.model.predict(x) assert y.shape == (len(self.objective_names),) assert y.dtype.kind == "f" assert np.all(-np.inf < y) # Will catch nan too y = tuple(y.tolist()) # Make consistent with SklearnModel typing return y ================================================ FILE: bayesmark/space.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Do the conversion of search spaces into a normalized cartesian space. """ import numpy as np from scipy.interpolate import interp1d from scipy.special import expit as logistic # because nobody calls it expit from scipy.special import logit from bayesmark.np_util import clip_chk, snap_to WARPED_DTYPE = np.float_ N_GRID_DEFAULT = 8 # I can't make up mind of unicode or str is better wrt to Py 2/3 compatibility # ==> Just make a global constant and make sure it works either way. # Note: if we switch to np.str_, we will also need to update doc-strings! CAT_DTYPE = np.unicode_ CAT_KIND = "U" CAT_NATIVE_DTYPE = str # Check to make sure consistent assert CAT_KIND == np.dtype(CAT_DTYPE).kind _infered = type(CAT_DTYPE("").item()) assert CAT_NATIVE_DTYPE == _infered # ============================================================================ # These could go into util # ============================================================================ def unravel_index(dims): """Builds tuple of coordinate arrays to traverse an `numpy` array. Wrapper around :func:`numpy:numpy.unravel_index` that avoids bug at corner case for ``dims=()``. The fix for this has been merged into the numpy master branch Oct 18, 2017 so future numpy releases will make this wrapper not needed. Otherwise, ``unravel_index(X.shape)`` is equivalent to: ``np.unravel_index(range(X.size), X.shape)``. Parameters ---------- dims : tuple(int) The shape of the array to use for unraveling ``indices``. Returns ------- unraveled_coords : tuple(:class:`numpy:numpy.ndarray`) Each array in the tuple has shape (n,) where ``n=np.prod(dims)``. References ---------- unravel_index(0, ()) should return () (Trac #2120) #580 https://github.com/numpy/numpy/issues/580 Allow `unravel_index(0, ())` to return () #9884 https://github.com/numpy/numpy/pull/9884 """ size = np.prod(dims) if dims == () or size == 0: # The corner case return () idx = np.unravel_index(range(np.prod(dims)), dims) return idx def encode(X, labels, assume_sorted=False, dtype=bool, assume_valid=False): """Perform one hot encoding of categorical data in :class:`numpy:numpy.ndarray` variable `X` of any dimension. Parameters ---------- X : :class:`numpy:numpy.ndarray` of shape (...) Categorical values of any standard type. Vectorized to work for any dimensional `X`. labels : :class:`numpy:numpy.ndarray` of shape (n,) Complete list of all possible labels. List is flattened if it is not already 1 dimensional. assume_sorted : bool If true, assume labels is already sorted and unique. This saves the computational cost of calling :func:`numpy:numpy.unique`. dtype : type Desired data of feature array. One-hot is most logically `bool`, but feature matrices are usually `float`. assume_valid : bool If true, assume all element of `X` are in the list `labels`. This saves the computational cost of verifying `X` are in `labels`. If true and a non-label `X` occurs this routine will silently give bogus result. Returns ------- Y : :class:`numpy:numpy.ndarray` of shape (..., n) One-hot encoding of `X`. Extra dimension is appended at end for the one-hot vector. It has data type `dtype`. """ X = np.asarray(X) labels = np.asarray(labels) if assume_sorted else np.unique(labels) check_array(labels, "labels", pre=True, ndim=1, min_size=1) idx = np.searchsorted(labels, X) # If x is not even in labels then this will fail. This is not ValueError # because the user explictly asked for this using argument assume_valid. assert assume_valid or np.all(np.asarray(labels[idx]) == X) # This is using some pro np indexing technique to vectorize across all # possible input dimensions for X in the same code. Y = np.zeros(X.shape + (len(labels),), dtype=dtype) Y[unravel_index(X.shape) + (idx.ravel(),)] = True return Y def decode(Y, labels, assume_sorted=False): """Perform inverse of one-hot encoder `encode`. Parameters ---------- Y : :class:`numpy:numpy.ndarray` of shape (..., n) One-hot encoding of categorical data `X`. Extra dimension is appended at end for the one-hot vector. Maximum element is taken if there is more than one non-zero entry in one-hot vector. labels : :class:`numpy:numpy.ndarray` of shape (n,) Complete list of all possible labels. List is flattened if it is not already 1-dimensional. assume_sorted : bool If true, assume labels is already sorted and unique. This saves the computational cost of calling :func:`numpy:numpy.unique`. Returns ------- X : :class:`numpy:numpy.ndarray` of shape (...) Categorical values corresponding to one-hot encoded `Y`. """ Y = np.asarray(Y) labels = np.asarray(labels) if assume_sorted else np.unique(labels) check_array(labels, "labels", pre=True, ndim=1, min_size=1) check_array(Y, "Y", pre=True, shape_endswith=(len(labels),)) idx = np.argmax(Y, axis=-1) X = labels[idx] return X def _error(msg, pre=False): # pragma: validator """Helper routine for :func:`.check_array`. This could probably be made cleaner by using raise to create the assert. """ if pre: raise ValueError(msg) else: assert False, msg def check_array( X, name, pre=False, ndim=None, shape=None, shape_endswith=(), min_size=0, dtype=None, kind=None, allow_infinity=True, allow_nan=True, unsorted=True, whitelist=None, ): # pragma: validator """Like :func:`sklearn:sklearn.utils.check_array` but better. Check specified property of input array `X`. If an argument is not specified it passes by default. Parameters ---------- X : :class:`numpy:numpy.ndarray` `numpy` array we want to validate. name : str Human readable name of of variable to refer to it in error messages. Note this can include spaces unlike simply using the variable name. pre : bool If true, interpret this as check as validating pre-conditions to a function and will raise a `ValueError` if a check fails. If false, assumes we are checking post-conditions and will raise an assertion failure. ndim : int Expected value of ``X.ndim``. shape : tuple(int) Expected value of ``X.shape``. shape_endswith : tuple(int) Expected that ``X.shape`` ends with `shape_endswith`. This is useful in broadcasting where extra dimensions might be added on. min_size : int Minimum value for ``X.size`` dtype : dtype Expected value of ``X.dtype``. kind : str Expected value of ``X.dtype.kind``. This is `'f'` for `float`, `'i'` for `int`, and so on. allow_infinity : bool If false, the check fails when `X` contains inf or ``-inf``. allow_nan : bool If false, the check fails when `X` contains a ``NaN``. unsorted : bool If false, the check fails when `X` is not in sorted order. This is designed to even work with string arrays. whitelist : :class:`numpy:numpy.ndarray` Array containing allowed values for `X`. If an element of `X` is not found in `whitelist`, the check fails. """ if (ndim is not None) and X.ndim != ndim: _error("Expected %d dimensions for %s, got %d" % (ndim, name, X.ndim), pre) if (shape is not None) and X.shape != shape: _error("Expected shape %s for %s, got %s" % (str(shape), name, str(X.shape)), pre) if len(shape_endswith) > 0: if X.shape[-len(shape_endswith) :] != shape_endswith: if len(shape_endswith) == 1: _error("Expected shape (..., %d) for %s, got %s" % (shape_endswith[0], name, str(X.shape)), pre) else: _error("Expected shape (..., %s for %s, got %s" % (str(shape_endswith)[1:], name, str(X.shape)), pre) if (min_size > 0) and (X.size < min_size): _error("%s needs at least %d elements, it has %d" % (name, min_size, X.size), pre) if (dtype is not None) and X.dtype != np.dtype(dtype): _error("Expected dtype %s for %s, got %s" % (str(np.dtype(dtype)), name, str(X.dtype)), pre) if (kind is not None) and X.dtype.kind != kind: _error("Expected array with kind %s for %s, got %s" % (kind, name, str(X.dtype.kind)), pre) if (not allow_infinity) and np.any(np.abs(X) == np.inf): _error("Infinity is not allowed in %s" % name, pre) if (not allow_nan) and np.any(np.isnan(X)): _error("NaN is not allowed in %s" % name, pre) if whitelist is not None: ok = np.all([xx in whitelist for xx in np.nditer(X, ["zerosize_ok"])]) if not ok: _error("Expected all elements of %s to be in %s" % (name, str(whitelist)), pre) # Only do this check in 1D if X.ndim == 1 and (not unsorted) and np.any(X[:-1] > X[1:]): _error("Expected sorted input for %s" % name, pre) # ============================================================================ # Setup warping dictionaries # ============================================================================ def identity(x): """Helper function that perform warping in linear space. Sort of a no-op. Parameters ---------- x : scalar Input variable in linear space. Can be any numeric type and is vectorizable. Returns ------- y : scalar Same as input `x`. """ y = x return y def bilog(x): """Bilog warping function. Extension of log to work with negative numbers. ``Bilog(x) ~= log(x)`` for large `x` or ``-log(abs(x))`` if `x` is negative. However, the bias term ensures good behavior near 0 and ``bilog(0) = 0``. Parameters ---------- x : scalar Input variable in linear space. Can be any numeric type and is vectorizable. Returns ------- y : float The bilog of `x`. """ y = np.sign(x) * np.log(1.0 + np.abs(x)) return y def biexp(x): """Inverse of :func:`.bilog` function. Parameters ---------- x : scalar Input variable in linear space. Can be any numeric type and is vectorizable. Returns ------- y : float The biexp of `x`. """ y = np.sign(x) * (np.exp(np.abs(x)) - 1.0) return y WARP_DICT = {"linear": identity, "log": np.log, "logit": logit, "bilog": bilog} UNWARP_DICT = {"linear": identity, "log": np.exp, "logit": logistic, "bilog": biexp} # ============================================================================ # Setup spaces class hierarchy # ============================================================================ class Space(object): """Base class for all types of variables. """ def __init__(self, dtype, default_round, warp="linear", values=None, range_=None): """Generic constructor of `Space` class. Not intended to be called directly but instead by child classes. However, `Space` is not an abstract class and will not give an error when instantiated. """ self.dtype = dtype assert warp in WARP_DICT, "invalid space %s, allowed spaces are: %s" % (str(warp), str(WARP_DICT.keys())) self.warp_f = WARP_DICT[warp] self.unwarp_f = UNWARP_DICT[warp] # Setup range and rounding if values is suplied assert (values is None) != (range_ is None) round_to_values = default_round if range_ is None: # => value is not None # Debatable if unique should be done before or after cast. But I # think after is better, esp. when changing precisions. values = np.asarray(values, dtype=dtype) values = np.unique(values) # values now 1D ndarray no matter what check_array( values, "unique values", pre=True, ndim=1, dtype=dtype, min_size=2, allow_infinity=False, allow_nan=False, ) # Extrapolation might happen due to numerics in type conversions. # Bounds checking is still done in validate routines. round_to_values = interp1d(values, values, kind="nearest", fill_value="extrapolate") range_ = (values[0], values[-1]) # Save values and rounding # Values is either None or was validated inside if statement self.values = values self.round_to_values = round_to_values # Note that if dtype=None that is the default for asarray. range_ = np.asarray(range_, dtype=dtype) check_array(range_, "range", pre=True, shape=(2,), dtype=dtype, unsorted=False) # Save range info, with input validation and post validation self.lower, self.upper = range_ # Convert to warped bounds too with lots of post validation self.lower_warped, self.upper_warped = self.warp_f(range_[..., None]).astype(WARPED_DTYPE, copy=False) check_array( self.lower_warped, "warped lower bound %s(%.1f)" % (warp, self.lower), ndim=1, pre=True, dtype=WARPED_DTYPE, allow_infinity=False, allow_nan=False, ) # Should never happen if warpers are strictly monotonic: assert np.all(self.lower_warped <= self.upper_warped) # Make sure a bit bigger to keep away from lower due to numerics self.upper_warped = np.maximum(self.upper_warped, np.nextafter(self.lower_warped, np.inf)) check_array( self.upper_warped, "warped upper bound %s(%.1f)" % (warp, self.upper), pre=True, shape=self.lower_warped.shape, dtype=WARPED_DTYPE, allow_infinity=False, allow_nan=False, ) # Should never happen if warpers are strictly monotonic: assert np.all(self.lower_warped < self.upper_warped) def validate(self, X, pre=False): """Routine to validate inputs to warp. This routine does not perform any checking on the dimensionality of `X` and is fully vectorized. """ X = np.asarray(X, dtype=self.dtype) if self.values is None: X = clip_chk(X, self.lower, self.upper) else: check_array(X, "X", pre=pre, whitelist=self.values) return X def validate_warped(self, X, pre=False): """Routine to validate inputs to unwarp. This routine is vectorized, but `X` must have at least 1-dimension. """ X = np.asarray(X, dtype=WARPED_DTYPE) check_array(X, "X", pre=pre, shape_endswith=(len(self.lower_warped),)) X = clip_chk(X, self.lower_warped, self.upper_warped) return X def warp(self, X): """Warp inputs to a continuous space. Parameters ---------- X : :class:`numpy:numpy.ndarray` of shape (...) Input variables to warp. This is vectorized to work in any dimension, but it must have the same type code as the class, which is in `self.type_code`. Returns ------- X_w : :class:`numpy:numpy.ndarray` of shape (..., m) Warped version of input space. By convention there is an extra dimension on warped array. Currently, ``m=1`` for all warpers. `X_w` will have a `float` type. """ X = self.validate(X, pre=True) X_w = self.warp_f(X) X_w = X_w[..., None] # Convention is that warped has extra dim X_w = self.validate_warped(X_w) # Ensures of WAPRED_DTYPE check_array(X_w, "X", ndim=X.ndim + 1, dtype=WARPED_DTYPE) return X_w def unwarp(self, X_w): """Inverse of `warp` function. Parameters ---------- X_w : :class:`numpy:numpy.ndarray` of shape (..., m) Warped version of input space. This is vectorized to work in any dimension. But, by convention, there is an extra dimension on the warped array. Currently, the last dimension ``m=1`` for all warpers. `X_w` must be of a `float` type. Returns ------- X : :class:`numpy:numpy.ndarray` of shape (...) Unwarped version of `X_w`. `X` will have the same type code as the class, which is in `self.type_code`. """ X_w = self.validate_warped(X_w, pre=True) X = clip_chk(self.unwarp_f(X_w[..., 0]), self.lower, self.upper) X = self.round_to_values(X) X = self.validate(X) # Ensures of dtype check_array(X, "X", ndim=X_w.ndim - 1, dtype=self.dtype) return X def get_bounds(self): """Get bounds of the warped space. Returns ------- bounds : :class:`numpy:numpy.ndarray` of shape (D, 2) Bounds in the warped space. First column is the lower bound and the second column is the upper bound. Calling ``bounds.tolist()`` gives the bounds in the standard form expected by `scipy` optimizers: ``[(lower_1, upper_1), ..., (lower_n, upper_n)]``. """ bounds = np.stack((self.lower_warped, self.upper_warped), axis=1) check_array(bounds, "bounds", shape=(len(self.lower_warped), 2), dtype=WARPED_DTYPE) return bounds def grid(self, max_interp=N_GRID_DEFAULT): """Return grid spanning the original (unwarped) space. Parameters ---------- max_interp : int The number of points to use in grid space when a range and not values are used to define the space. Must be ``>= 0``. Returns ------- values : list Grid spanning the original space. This is simply `self.values` if a grid has already been specified, otherwise it is just grid across the range. """ values = self.values if values is None: vw = np.linspace(self.lower_warped, self.upper_warped, max_interp) # Some spaces like int make result in duplicates after unwarping # so we apply unique to avoid this. However this will usually be # wasted computation. values = np.unique(self.unwarp(vw[:, None])) check_array(values, "values", ndim=1, dtype=self.dtype) # Best to convert to list to make sure in native type values = values.tolist() return values class Real(Space): """Space for transforming real variables to normalized space (after warping). """ def __init__(self, warp="linear", values=None, range_=None): """Build Real space class. Parameters ---------- warp : {'linear', 'log', 'logit', 'bilog'} Which warping type to apply to the space. The warping is applied in the original space. That is, in a space with ``warp='log'`` and ``range_=(2.0, 10.0)``, the value 2.0 warps to ``log(2)``, not ``-inf`` as in some other frameworks. values : None or list(float) Possible values for space to take. Values must be of `float` type. range_ : None or :class:`numpy:numpy.ndarray` of shape (2,) Array with (lower, upper) pair with limits of space. Note that one must specify `values` or `range_`, but not both. `range_` must be composed of `float`. """ assert warp is not None, "warp/space not specified for real" Space.__init__(self, np.float_, identity, warp, values, range_) class Integer(Space): """Space for transforming integer variables to continuous normalized space. """ def __init__(self, warp="linear", values=None, range_=None): """Build Integer space class. Parameters ---------- warp : {'linear', 'log', 'bilog'} Which warping type to apply to the space. The warping is applied in the original space. That is, in a space with ``warp='log'`` and ``range_=(2, 10)``, the value 2 warps to ``log(2)``, not ``-inf`` as in some other frameworks. There are no settings with integers that are compatible with the logit warp. values : None or list(float) Possible values for space to take. Values must be of `int` type. range_ : None or :class:`numpy:numpy.ndarray` of shape (2,) Array with (lower, upper) pair with limits of space. Note that one must specify `values` or `range_`, but not both. `range_` must be composed of `int`. """ assert warp is not None, "warp/space not specified for int" Space.__init__(self, np.int_, np.round, warp, values, range_) class Boolean(Space): """Space for transforming Boolean variables to continuous normalized space. """ def __init__(self, warp=None, values=None, range_=None): """Build Boolean space class. Parameters ---------- warp : None Must be omitted or None, provided for consitency with other types. values : None Must be omitted or None, provided for consitency with other types. range_ : None Must be omitted or None, provided for consitency with other types. """ assert warp is None, "cannot warp bool" assert (values is None) and (range_ is None), "cannot pass in values or range for bool" self.dtype = np.bool_ self.warp_f = identity self.unwarp_f = identity self.values = np.array([False, True], dtype=np.bool_) self.round_to_values = np.round self.lower, self.upper = self.dtype(False), self.dtype(True) self.lower_warped = np.array([0.0], dtype=WARPED_DTYPE) self.upper_warped = np.array([1.0], dtype=WARPED_DTYPE) class Categorical(Space): """Space for transforming categorical variables to continuous normalized space. """ def __init__(self, warp=None, values=None, range_=None): """Build Integer space class. Parameters ---------- warp : None Must be omitted or None, provided for consitency with other types. values : list(str) Possible values for space to take. Values must be unicode strings. Requiring type unicode (``'U'``) rather than strings (``'S'``) corresponds to the native string type. range_ : None Must be omitted or None, provided for consitency with other types. """ assert warp is None, "cannot warp cat" assert values is not None, "must pass in explicit values for cat" assert range_ is None, "cannot pass in range for cat" values = np.unique(values) # values now 1D ndarray no matter what check_array(values, "values", pre=True, ndim=1, kind=CAT_KIND, min_size=2) self.values = values self.dtype = CAT_DTYPE # Debatable if decode should go in unwarp or round_to_values self.warp_f = self._encode self.unwarp_f = identity self.round_to_values = self._decode self.lower, self.upper = None, None # Don't need them self.lower_warped = np.zeros(len(values), dtype=WARPED_DTYPE) self.upper_warped = np.ones(len(values), dtype=WARPED_DTYPE) def _encode(self, x): return encode(x, self.values, True, WARPED_DTYPE, True) def _decode(self, x): return decode(x, self.values, True) def warp(self, X): """Warp inputs to a continuous space. Parameters ---------- X : :class:`numpy:numpy.ndarray` of shape (...) Input variables to warp. This is vectorized to work in any dimension, but it must have the same type code as the class, which is unicode (``'U'``) for the :class:`.Categorical` space. Returns ------- X_w : :class:`numpy:numpy.ndarray` of shape (..., m) Warped version of input space. By convention there is an extra dimension on warped array. The warped space has a one-hot encoding and therefore `m` is the number of possible values in the space. `X_w` will have a `float` type. """ X = self.validate(X, pre=True) X_w = self.warp_f(X) # Probably over kill to validate here too, but why not: X_w = self.validate_warped(X_w) check_array(X_w, "X", ndim=X.ndim + 1, dtype=WARPED_DTYPE) return X_w def unwarp(self, X_w): """Inverse of `warp` function. Parameters ---------- X_w : :class:`numpy:numpy.ndarray` of shape (..., m) Warped version of input space. The warped space has a one-hot encoding and therefore `m` is the number of possible values in the space. `X_w` will have a `float` type. Non-zero/one values are allowed in `X_w`. The maximal element in the vector is taken as the encoded value. Returns ------- X : :class:`numpy:numpy.ndarray` of shape (...) Unwarped version of `X_w`. `X` will have same type code as the :class:`.Categorical` class, which is unicode (``'U'``). """ X_w = self.validate_warped(X_w, pre=True) X = self.round_to_values(self.unwarp_f(X_w)) X = self.validate(X) check_array(X, "X", ndim=X_w.ndim - 1, kind=CAT_KIND) return X # Treat ordinal identically to categorical for now SPACE_DICT = {"real": Real, "int": Integer, "bool": Boolean, "cat": Categorical, "ordinal": Categorical} # ============================================================================ # Setup code for joint spaces over multiple parameters with different configs # ============================================================================ class JointSpace(object): """Combination of multiple :class:`.Space` objectives to transform multiple variables at the same time (jointly). """ def __init__(self, meta): """Build Real space class. Parameters ---------- meta : dict(str, dict) Configuration of variables in joint space. See API description. """ assert len(meta) > 0 # Unclear what to do with empty space # Lock in an order if not ordered dict, sorted helps reproducibility self.param_list = sorted(meta.keys()) # Might as well pre-validate a bit here for param, config in meta.items(): assert config["type"] in SPACE_DICT, "invalid input type %s" % config["type"] spaces = { param: SPACE_DICT[config["type"]]( config.get("space", None), config.get("values", None), config.get("range", None) ) for param, config in meta.items() } self.spaces = spaces self.blocks = np.cumsum([len(spaces[param].get_bounds()) for param in self.param_list]) def validate(self, X): """Raise `ValueError` if X does not match the format expected for a joint space.""" for record in X: if self.param_list != sorted(record.keys()): raise ValueError("Expected joint space keys %s, but got %s", (self.param_list, sorted(record.keys()))) for param in self.param_list: self.spaces[param].validate([record[param]], pre=True) # Return X back so we have option to cast it to list or whatever later return X def warp(self, X): """Warp inputs to a continuous space. Parameters ---------- X : list(dict(str, object)) of shape (n,) List of `n` points in the joint space to warp. Each list element is a dictionary where each key corresponds to a variable in the joint space. Keys can be be missing in the records and the according warped variables will be ``nan``. Returns ------- X_w : :class:`numpy:numpy.ndarray` of shape (n, m) Warped version of input space. Result is 2D `float` np array. `n` is the number of input points, length of `X`. `m` is the size of the joint warped space, which can be inferred by calling :func:`.get_bounds`. """ # It would be nice to have cleaner way to deal with this corner case if len(X) == 0: return np.zeros((0, self.blocks[-1]), dtype=WARPED_DTYPE) X_w = [ np.concatenate( [ self.spaces[param].warp(record[param]) if param in record else np.full(len(self.spaces[param].get_bounds()), np.nan) for param in self.param_list ] ) for record in X ] X_w = np.stack(X_w, axis=0) check_array(X_w, "X", shape=(len(X), self.blocks[-1]), dtype=WARPED_DTYPE) return X_w def unwarp(self, X_w, fixed_vals={}): """Inverse of :func:`.warp`. Parameters ---------- X_w : :class:`numpy:numpy.ndarray` of shape (n, m) Warped version of input space. Must be 2D `float` :class:`numpy:numpy.ndarray`. `n` is the number of separate points in the warped joint space. `m` is the size of the joint warped space, which can be inferred in advance by calling :func:`.get_bounds`. fixed_vals : dict Subset of variables we want to keep fixed in X. Unwarp checks that the unwarped version of `X_w` matches `fixed_vals` up to numerical error. Otherwise, an error is raised. Returns ------- X : list(dict(str, object)) of shape (n,) List of `n` points in the joint space to warp. Each list element is a dictionary where each key corresponds to a variable in the joint space. """ X_w = np.asarray(X_w) check_array(X_w, "X", ndim=2, shape_endswith=(self.blocks[-1],), dtype=WARPED_DTYPE) N = X_w.shape[0] # Use snap_to to make sure we get exact value (no-round off) for cases where we know expected answer X = { param: snap_to(self.spaces[param].unwarp(xx), fixed_vals.get(param, None)) for param, xx in zip(self.param_list, np.hsplit(X_w, self.blocks[:-1])) } # Convert dict of arrays to list of dicts, this would not be needed if # we used pandas but we do not want to add it as a dep. np.asscalar and # .item() appear to be the same thing but asscalar seems more readable. X = [{param: X[param][ii].item() for param in self.param_list} for ii in range(N)] return X def get_bounds(self): """Get bounds of the warped joint space. Returns ------- bounds : :class:`numpy:numpy.ndarray` of shape (m, 2) Bounds in the warped space. First column is the lower bound and the second column is the upper bound. ``bounds.tolist()`` gives the bounds in the standard form expected by scipy optimizers: ``[(lower_1, upper_1), ..., (lower_n, upper_n)]``. """ bounds = np.concatenate([self.spaces[param].get_bounds() for param in self.param_list], axis=0) check_array(bounds, "bounds", shape_endswith=(2,), dtype=WARPED_DTYPE) return bounds def grid(self, max_interp=N_GRID_DEFAULT): """Return grid spanning the original (unwarped) space. Parameters ---------- max_interp : int The number of points to use in grid space when a range and not values are used to define the space. Must be ``>= 0``. Returns ------- axes : dict(str, list) Grids spanning the original spaces of each variable. For each variable, this is simply ``self.values`` if a grid has already been specified, otherwise it is just grid across the range. """ axes = {var_name: space.grid(max_interp=max_interp) for var_name, space in self.spaces.items()} return axes ================================================ FILE: bayesmark/stats.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """General statistic tools useful in the benchmark. """ import numpy as np import scipy.stats as sst def robust_standardize(X, q_level=0.5): """Perform robust standardization of data matrix `X` over axis 0. Similar to :func:`sklearn:sklearn.preprocessing.robust_scale` except also does a Gaussian adjustment rescaling so that if Gaussian data is passed in the transformed data will, in large `n`, be distributed as N(0,1). See sklearn feature request #10139 on github. Parameters ---------- X : :class:`numpy:numpy.ndarray` of shape (n, ...) Array containing elements standardize. Require ``n >= 2``. q_level : scalar Must be in [0, 1]. Inter-quartile range to use for scale estimation. Returns ------- X : :class:`numpy:numpy.ndarray` of shape (n, ...) Elements of input `X` standardization. """ X = np.asarray(X) assert X.ndim in (1, 2) assert np.all(np.isfinite(X)) assert 0.0 < q_level and q_level <= 1.0 assert X.shape[0] >= 2 mu = np.median(X, axis=0) q0, q1 = 0.5 * (1.0 - q_level), 0.5 * (1.0 + q_level) v = np.percentile(X, 100 * q1, axis=0) - np.percentile(X, 100 * q0, axis=0) v = np.asarray(v) v[v == 0.0] = 1.0 X_ss = (X - mu) / v # Rescale to match scale of N(0,1) X_ss = X_ss * (sst.norm.ppf(q1) - sst.norm.ppf(q0)) assert X.shape == X_ss.shape return X_ss def t_EB(x, alpha=0.05, axis=-1): """Get t-statistic based error bars on mean of `x`. Parameters ---------- x : :class:`numpy:numpy.ndarray` of shape (n_samples,) Data points to estimate mean. Must not be empty or contain ``NaN``. alpha : float The alpha level (``1-confidence``) probability (in (0, 1)) to construct confidence interval from t-statistic. axis : int The axis on `x` where we compute the t-statistics. The function is vectorized over all other dimensions. Returns ------- EB : float Size of error bar on mean (``>= 0``). The confidence interval is ``[mean(x) - EB, mean(x) + EB]``. `EB` is ``inf`` when ``len(x) <= 1``. Will be ``NaN`` if there are any infinite values in `x`. """ assert np.ndim(x) >= 1 and (not np.any(np.isnan(x))) assert np.ndim(alpha) == 0 assert 0.0 < alpha and alpha < 1.0 N = np.shape(x)[axis] if N <= 1: return np.full(np.sum(x, axis=axis).shape, fill_value=np.inf) confidence = 1 - alpha # loc cancels out when we just want EB anyway LB, UB = sst.t.interval(confidence, N - 1, loc=0.0, scale=1.0) assert not (LB > UB) # Just multiplying scale=ss.sem(x) is better for when scale=0 EB = 0.5 * sst.sem(x, axis=axis) * (UB - LB) return EB ================================================ FILE: bayesmark/util.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """General utilities that should arguably be included in Python. """ import shlex def in_or_none(x, L): """Check if item is in list of list is None.""" return (L is None) or (x in L) def all_unique(L): """Check if all elements in a list are unique. Parameters ---------- L : list List we would like to check for uniqueness. Returns ------- uniq : bool True if all elements in `L` are unique. """ uniq = len(L) == len(set(L)) return uniq def strict_sorted(L): """Return a strictly sorted version of `L`. Therefore, this raises an error if `L` contains duplicates. Parameters ---------- L : list List we would like to sort. Returns ------- S : list Strictly sorted version of `L`. """ assert all_unique(L), "Cannot strict sort because list contains duplicates." S = sorted(L) return S def range_str(stop): """Version of ``range(stop)`` that instead returns strings that are zero padded so the entire iteration is of the same length. Parameters ---------- stop : int Stop value equivalent to ``range(stop)``. Yields ------ x : str String representation of integer zero padded so all items from this generator have the same ``len(x)``. """ str_len = len(str(stop - 1)) # moot if stop=0 def map_(x): ss = str(x).zfill(str_len) return x, ss G = map(map_, range(stop)) return G def str_join_safe(delim, str_vec, append=False): """Version of `str.join` that is guaranteed to be invertible. Parameters ---------- delim : str Delimiter to join the strings. str_vec : list(str) List of strings to join. A `ValueError` is raised if `delim` is present in any of these strings. append : bool If true, assume the first element is already joined and we are appending to it. So, `str_vec[0]` can contain `delim`. Returns ------- joined_str : str Joined version of `str_vec`, which is always recoverable with ``joined_str.split(delim)``. Examples -------- Append is required because, .. code-block:: pycon ss = str_join_safe('_', ('foo', 'bar')) str_join_safe('_', (ss, 'baz', 'qux')) would fail because we are appending ``'baz'`` and ``'qux'`` to the already joined string ``ss = 'foo_bar'``. In this case, we use .. code-block:: pycon ss = str_join_safe('_', ('foo', 'bar')) str_join_safe('_', (ss, 'baz', 'qux'), append=True) """ chk_vec = str_vec[1:] if append else str_vec for ss in chk_vec: if delim in ss: raise ValueError("%s cannot contain delimeter %s" % (ss, delim)) joined_str = delim.join(str_vec) return joined_str def shell_join(argv, delim=" "): """Join strings together in a way that is an inverse of `shlex` shell parsing into `argv`. Basically, if the resulting string is passed as a command line argument then `sys.argv` will equal `argv`. Parameters ---------- argv : list(str) List of arguments to collect into command line string. It will be escaped accordingly. delim : str Whitespace delimiter to join the strings. Returns ------- cmd : str Properly escaped and joined command line string. """ vv = [shlex.quote(vv) for vv in argv] cmd = delim.join(vv) assert shlex.split(cmd) == list(argv) return cmd def chomp(str_val, ext="\n"): """Chomp a suffix off a string. Parameters ---------- str_val : str String we want to chomp off a suffix, e.g., ``"foo.log"``, and we want to chomp the file extension. ext : str The suffix we want to chomp. An error is raised if `str_val` doesn't end in `ext`. Returns ------- chomped : str Version of `str_val` with `ext` removed from the end. """ n = len(ext) assert n > 0 chomped, ext_ = str_val[:-n], str_val[-n:] assert ext == ext_, "%s must end with %s" % (repr(str_val), repr(ext)) return chomped def preimage_func(f, x): """Pre-image a funcation at a set of input points. Parameters ---------- f : typing.Callable The function we would like to pre-image. The output type must be hashable. x : typing.Iterable Input points we would like to evaluate `f`. `x` must be of a type acceptable by `f`. Returns ------- D : dict(object, list(object)) This dictionary maps the output of `f` to the list of `x` values that produce it. """ D = {} for xx in x: D.setdefault(f(xx), []).append(xx) return D ================================================ FILE: bayesmark/xr_util.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """General utilities for `xarray` that should be included in `xarray`. """ from collections import OrderedDict import numpy as np import pandas as pd import xarray as xr from bayesmark.util import all_unique def is_simple_coords(coords, min_side=0, dims=None): """Check if all xr coordinates are "simple". That is, equals to ``np.arange(n)``. Parameters ---------- coords : dict-like of coordinates The coordinates we would like to check, e.g. from ``DataArray.coords``. min_side : int The minimum side requirement. We can set this ``min_side=1`` and have empty coordinates result in a return value of ``False``. dims : None or list of dimension names Dimensions we want to check for simplicity. If ``None``, check all dimensions. Returns ------- simple : bool True when all coordinates are simple. """ for kk in coords: if (dims is None) or (kk in dims): C = coords[kk].values # Not checking dtype on empty coords, could check that too if we want to be strict if len(C) > 0 and C.dtype != np.int_: return False C = C.tolist() if len(C) < min_side: return False if C != list(range(len(C))): return False return True def ds_like(ref, vars_, dims, fill=np.nan): """Produce a blank :class:`xarray:xarray.Dataset` copying some coordinates from another :class:`xarray:xarray.Dataset`. Parameters ---------- ref : :class:`xarray:xarray.Dataset` The reference dataset we want to copy coordinates from. vars_ : typing.Iterable List of variable names we want in the new dataset. dims : list List of dimensions we want to copy over from `ref`. These are the dimensions of the output. fill : scalar Scalar value to fill the blank dataset. The `dtype` will be determined from the `fill` value. Returns ------- ds : :class:`xarray:xarray.Dataset` A new dataset with variables `vars_` and dimensions `dims` where the coordinates have been copied from `ref`. All values are filled with `fill`. """ size = [ref.sizes[dd] for dd in dims] # Use OrderedDict for good measure, probably not needed data = OrderedDict([(vv, (dims, np.full(size, fill))) for vv in vars_]) coords = OrderedDict([(dd, ref.coords[dd].values) for dd in dims]) ds = xr.Dataset(data, coords=coords) return ds def ds_like_mixed(ref, vars_, dims, fill=np.nan): """The same as `ds_like` but allow different dimensions for each variable. Parameters ---------- ref : :class:`xarray:xarray.Dataset` The reference dataset we want to copy coordinates from. vars_ : typing.Iterable List of (variable names, dimension) pairs we want in the new dataset. The dimensions for each variable must be a subset of `dims`. dims : list List of all dimensions we want to copy over from `ref`. fill : scalar Scalar value to fill the blank dataset. The `dtype` will be determined from the `fill` value. Returns ------- ds : :class:`xarray:xarray.Dataset` A new dataset with variables `vars_` and dimensions `dims` where the coordinates have been copied from `ref`. All values are filled with `fill`. """ coords = OrderedDict([(dd, ref.coords[dd].values) for dd in dims]) data = OrderedDict() for var_name, var_dims in vars_: assert set(var_dims).issubset(dims) size = [ref.sizes[dd] for dd in var_dims] data[var_name] = (var_dims, np.full(size, np.nan)) ds = xr.Dataset(data, coords=coords) return ds def only_dataarray(ds): """Convert a :class:`xarray:xarray.Dataset` to a :class:`xarray:xarray.DataArray`. If the :class:`xarray:xarray.Dataset` has more than one variable, an error is raised. Parameters ---------- ds : :class:`xarray:xarray.Dataset` :class:`xarray:xarray.Dataset` we would like to convert to a :class:`xarray:xarray.DataArray`. This must contain only one variable. Returns ------- da : :class:`xarray:xarray.DataArray` The :class:`xarray:xarray.DataArray` extracted from `ds`. """ name, = ds da = ds[name] return da def coord_compat(da_seq, dims): """Check if a sequence of :class:`xarray:xarray.DataArray` have compatible coordinates. Parameters ---------- da_seq : list(:class:`xarray:xarray.DataArray`) Sequence of :class:`xarray:xarray.DataArray` we would like to check for compatibility. :class:`xarray:xarray.Dataset` work too. dims : list Subset of all dimensions in the :class:`xarray:xarray.DataArray` we are concerned with for compatibility. Returns ------- compat : bool True if all the :class:`xarray:xarray.DataArray` have compatible coordinates. """ if len(da_seq) <= 1: return True ref = da_seq[0] for da in da_seq: # There is probably a better way to do this by attempting concat in try-except, but good enough for now: for dd in dims: assert dd in da.coords, "dim %s missing in dataarray" % dd if not np.all(ref.coords[dd].values == da.coords[dd].values): return False return True def da_to_string(da): """Generate a human readable version of a 1D :class:`xarray:xarray.DataArray`. Parameters ---------- da : :class:`xarray:xarray.DataArray` The :class:`xarray:xarray.DataArray` to display. Must only have one dimension. Returns ------- str_val : str String with human readable version of `da`. """ assert len(da.dims) == 1 str_val = da.to_series().to_string() return str_val def da_concat(da_dict, dims): """Concatenate a dictionary of :class:`xarray:xarray.DataArray` similar to :func:`pandas:pandas.concat`. Parameters ---------- da_dict : dict(tuple(str), :class:`xarray:xarray.DataArray`) Dictionary of :class:`xarray:xarray.DataArray` to combine. The keys are tuples of index values. The :class:`xarray:xarray.DataArray` must have compatible coordinates. dims : list(str) The names of the new dimensions we create for the dictionary keys. This must be of the same length as the key tuples in `da_dict`. Returns ------- da : :class:`xarray:xarray.DataArray` Combined data array. The new dimensions will be ``input_da.dims + dims``. """ assert len(da_dict) > 0 assert all(len(da.dims) > 0 for da in da_dict.values()), "0-dimensional DataArray not supported" assert all_unique(dims) cur_dims = list(da_dict.values())[0].dims assert all(da.dims == cur_dims for da in da_dict.values()) assert len(set(cur_dims) & set(dims)) == 0 def squeeze(tt): if len(tt) == 1: return tt[0] return tt D = OrderedDict([(squeeze(kk), da.to_series()) for kk, da in da_dict.items()]) df = pd.concat(D, axis=1) assert df.columns.nlevels == len(dims) df.columns.names = dims df = df.stack(level=list(range(df.columns.nlevels))) assert isinstance(df, pd.Series) da = df.to_xarray() assert isinstance(da, xr.DataArray) return da def ds_concat(ds_dict, dims): """Concatenate a dictionary of :class:`xarray:xarray.Dataset` similar to :func:`pandas:pandas.concat`, and a generalization of :func:`.da_concat`. Parameters ---------- ds_dict : dict(tuple(str), :class:`xarray:xarray.DataArray`) Dictionary of :class:`xarray:xarray.Dataset` to combine. The keys are tuples of index values. The :class:`xarray:xarray.Dataset` must have compatible coordinates, and all have the same variables. dims : list(str) The names of the new dimensions we create for the dictionary keys. This must be of the same length as the key tuples in `ds_dict`. Returns ------- ds : :class:`xarray:xarray.Dataset` Combined dataset. For each variable `var`, the new dimensions will be ``input_ds[var].dims + dims``. """ assert len(ds_dict) > 0 assert len(dims) > 0 assert all(len(kk) == len(dims) for kk in ds_dict) # Get an arbitrary element as the reference k0 = list(ds_dict.keys())[0] # Check all vars the same vars_, = set([tuple(ds) for ds in ds_dict.values()]) # Now combine da for each variable, one at a time ds = xr.Dataset(coords=ds_dict[k0].coords) for vv in vars_: da_dict = OrderedDict([(kk, da[vv]) for kk, da in ds_dict.items()]) ds[vv] = da_concat(da_dict, dims) return ds ================================================ FILE: build_wheel.sh ================================================ #!/bin/bash set -ex set -o pipefail # Display what version is being used for logging python --version # Fail if untracked files so we don't delete them in next step test -z "$(git status --porcelain)" # Build from clean repo, delete all ignored files git clean -x -f -d # Get everything in place to put inside the wheel SHA_LONG=$(git rev-parse HEAD) echo VERSION=\"$SHA_LONG\" >bayesmark/version.py # Now the actual build python3 setup.py sdist ================================================ FILE: docs/.gitignore ================================================ _build ================================================ FILE: docs/Makefile ================================================ # Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build # User-friendly check for sphinx-build ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) endif # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp latex latexpdf text man texinfo info gettext changes xml pseudoxml linkcheck all help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " devhelp to make HTML files and a Devhelp project" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " text to make text files" @echo " man to make manual pages" @echo " texinfo to make Texinfo files" @echo " info to make Texinfo files and run them through makeinfo" @echo " gettext to make PO message catalogs" @echo " changes to make an overview of all changed/added/deprecated items" @echo " xml to make Docutils-native XML files" @echo " pseudoxml to make pseudoxml-XML files for display purposes" @echo " linkcheck to check all external links for integrity" clean: rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/bayesmark.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/bayesmark.qhc" devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/bayesmark" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/bayesmark" @echo "# devhelp" latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." xml: $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml @echo @echo "Build finished. The XML files are in $(BUILDDIR)/xml." pseudoxml: $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml @echo @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." all: html dirhtml singlehtml pickle json htmlhelp qthelp devhelp latex latexpdf text man texinfo info gettext changes xml pseudoxml linkcheck ================================================ FILE: docs/authors.rst ================================================ ------- Credits ------- ~~~~~~~~~~~~~~~~ Development lead ~~~~~~~~~~~~~~~~ Ryan Turner (rdturnermtl) ~~~~~~~~~~~~ Contributors ~~~~~~~~~~~~ * David Eriksson (dme65) ================================================ FILE: docs/code.rst ================================================ ------------- Code Overview ------------- .. _bayesmark: ~~~~ Data ~~~~ .. automodule:: bayesmark.data :members: :exclude-members: ~~~~~~~~~~~~~~~~~~~~~~~ Expected Max Estimation ~~~~~~~~~~~~~~~~~~~~~~~ .. automodule:: bayesmark.expected_max :members: :exclude-members: ~~~~~~~~~~~~~~~~~~~~~~ Experiment Aggregation ~~~~~~~~~~~~~~~~~~~~~~ .. automodule:: bayesmark.experiment_aggregate :members: :exclude-members: main ~~~~~~~~~~~~~~~~~~~ Experiment Analysis ~~~~~~~~~~~~~~~~~~~ .. automodule:: bayesmark.experiment_analysis :members: :exclude-members: main ~~~~~~~~~~~~~~~~~~~ Experiment Baseline ~~~~~~~~~~~~~~~~~~~ .. automodule:: bayesmark.experiment_baseline :members: :exclude-members: main, do_baseline ~~~~~~~~~~~~~~~~~~~ Experiment Launcher ~~~~~~~~~~~~~~~~~~~ .. automodule:: bayesmark.experiment_launcher :members: :exclude-members: main ~~~~~~~~~~ Experiment ~~~~~~~~~~ .. automodule:: bayesmark.experiment :members: :exclude-members: experiment_main ~~~~~~~~~~~~~~~~~~~ Function Signatures ~~~~~~~~~~~~~~~~~~~ .. automodule:: bayesmark.signatures :members: :exclude-members: ~~~~~~~~~~ Numpy Util ~~~~~~~~~~ .. automodule:: bayesmark.np_util :members: :exclude-members: ~~~~~~~~~ Path Util ~~~~~~~~~ .. automodule:: bayesmark.path_util :members: :exclude-members: ~~~~~~~~~~~~~~~~~~~ Quantile Estimation ~~~~~~~~~~~~~~~~~~~ .. automodule:: bayesmark.quantiles :members: :exclude-members: ensure_shape ~~~~~~~~~~~~~ Random Search ~~~~~~~~~~~~~ .. automodule:: bayesmark.random_search :members: :exclude-members: ~~~~~~~~~~~~~ Serialization ~~~~~~~~~~~~~ .. automodule:: bayesmark.serialize :members: :exclude-members: Serializer ~~~~~~~~~~~~~~ Sklearn Tuning ~~~~~~~~~~~~~~ .. automodule:: bayesmark.sklearn_funcs :members: :exclude-members: ~~~~~ Space ~~~~~ .. automodule:: bayesmark.space :members: :exclude-members: check_array, unravel_index ~~~~~ Stats ~~~~~ .. automodule:: bayesmark.stats :members: :exclude-members: ~~~~~~~~~~~~~~ Util (General) ~~~~~~~~~~~~~~ .. automodule:: bayesmark.util :members: :exclude-members: ~~~~~~~~~~~ Xarray Util ~~~~~~~~~~~ .. automodule:: bayesmark.xr_util :members: :exclude-members: ================================================ FILE: docs/conf.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- # # bayesmark documentation build configuration file. # # This file is execfile()d with the current directory set to its # containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import os import sys # If extensions (or modules to document with autodoc) are in another # directory, add these directories to sys.path here. If the directory is # relative to the documentation root, use os.path.abspath to make it # absolute, like shown here. sys.path.insert(0, os.path.abspath("..")) sys.path.append(os.path.join(os.path.dirname(__file__), "..")) # Get the project root dir, which is the parent dir of this cwd = os.getcwd() project_root = os.path.dirname(cwd) # Insert the project root dir as the first element in the PYTHONPATH. # This lets us ensure that the source package is imported, and that its # version is used. sys.path.insert(0, project_root) # -- General configuration --------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = ["sphinx.ext.autodoc", "sphinx.ext.viewcode", "sphinx.ext.intersphinx", "sphinx.ext.napoleon"] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] # The suffix of source filenames. source_suffix = {".rst": "restructuredtext", ".txt": "markdown", ".md": "markdown"} # The encoding of source files. # source_encoding = 'utf-8-sig' # The master toctree document. master_doc = "index" # General information about the project. project = "bayesmark" copyright = "2018-2019" # The version info for the project you're documenting, acts as replacement # for |version| and |release|, also used in various other places throughout # the built documents. # # The short X.Y version. # version = bayesmark.__version__ # The full version, including alpha/beta/rc tags. # release = bayesmark.__version__ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # language = None # There are two options for replacing |today|: either, you set today to # some non-false value, then it is used: # today = '' # Else, today_fmt is used as the format for a strftime call. # today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = ["_build"] # The reST default role (used for this markup: `text`) to use for all # documents. # default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. # add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). # add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. # show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built # documents. # keep_warnings = False intersphinx_mapping = { "python": ("https://docs.python.org/3/", None), "numpy": ("https://docs.scipy.org/doc/numpy-1.16.1/", None), "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), "xarray": ("http://xarray.pydata.org/en/stable/", None), "sklearn": ("https://scikit-learn.org/stable/", None), } # -- Options for HTML output ------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. html_theme = "default" # Theme options are theme-specific and customize the look and feel of a # theme further. For a list of options available for each theme, see the # documentation. # html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. # html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". # html_title = None # A shorter title for the navigation bar. Default is the same as # html_title. # html_short_title = None # The name of an image file (relative to this directory) to place at the # top of the sidebar. # html_logo = None # The name of an image file (within the static path) to use as favicon # of the docs. This file should be a Windows icon file (.ico) being # 16x16 or 32x32 pixels large. # html_favicon = None # Add any paths that contain custom static files (such as style sheets) # here, relative to this directory. They are copied after the builtin # static files, so a file named "default.css" will overwrite the builtin # "default.css". html_static_path = [] # If not '', a 'Last updated on:' timestamp is inserted at every page # bottom, using the given strftime format. # html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. # html_use_smartypants = True # Custom sidebar templates, maps document names to template names. # html_sidebars = {} # Additional templates that should be rendered to pages, maps page names # to template names. # html_additional_pages = {} # If false, no module index is generated. # html_domain_indices = True # If false, no index is generated. # html_use_index = True # If true, the index is split into individual pages for each letter. # html_split_index = False # If true, links to the reST sources are added to the pages. # html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. # Default is True. # html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. # Default is True. # html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages # will contain a tag referring to it. The value of this option # must be the base URL from which the finished HTML is served. # html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). # html_file_suffix = None # Output file base name for HTML help builder. htmlhelp_basename = "bayesmark_doc" # -- Options for LaTeX output ------------------------------------------ latex_elements = { # The paper size ('letterpaper' or 'a4paper'). #'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). #'pointsize': '10pt', # Additional stuff for the LaTeX preamble. #'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass # [howto/manual]). latex_documents = [("index", "bayesmark.tex", "BO Benchmark Documentation", "Uber AI Labs", "manual")] # The name of an image file (relative to this directory) to place at # the top of the title page. # latex_logo = None # For "manual" documents, if this is true, then toplevel headings # are parts, not chapters. # latex_use_parts = False # If true, show page references after internal links. # latex_show_pagerefs = False # If true, show URL addresses after external links. # latex_show_urls = False # Documents to append as an appendix to all manuals. # latex_appendices = [] # If false, no module index is generated. # latex_domain_indices = True # -- Options for manual page output ------------------------------------ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [("index", "bayesmark", "bayesmark Documentation", ["Uber AI Labs"], 1)] # If true, show URL addresses after external links. # man_show_urls = False # -- Options for Texinfo output ---------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ( "index", "bayesmark", "bayesmark Documentation", "Uber AI Labs", "bayesmark", "Benchmark of Bayesian optimization packages on real problems.", "Miscellaneous", ) ] # Documents to append as an appendix to all manuals. # texinfo_appendices = [] # If false, no module index is generated. # texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. # texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. # texinfo_no_detailmenu = False ================================================ FILE: docs/dummy.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import sphinx # import extra deps and use it to keep pipreqs and flake8 happy for pkg in (sphinx,): print("%s %s" % (pkg.__name__, pkg.__version__)) ================================================ FILE: docs/index.rst ================================================ .. bayesmark documentation master file, created by sphinx-quickstart on Tue Jul 9 22:26:36 2013. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. Welcome to the Bayes Opt Benchmark Documentation ================================================ Contents: .. toctree:: :maxdepth: 2 readme scoring code authors ================================================ FILE: docs/readme.rst ================================================ .. include:: ../README.rst ================================================ FILE: docs/scoring.rst ================================================ .. _how-scoring-works: How scoring works ================= The scoring system is about aggregating the function evaluations of the optimizers. We represent :math:`F_{pmtn}` as the function evaluation of objective function :math:`p` (``TEST_CASE``) from the suggestion of method :math:`m` (``METHOD``) at batch :math:`t` (``ITER``) under repeated trial :math:`n` (``TRIAL``). In the case of batch sizes greater than 1, :math:`F_{pmtn}` is the minimum function evaluation across the suggestions in batch :math:`t`. The first transformation is that we consider the *cumulative minimum* over batches :math:`t` as the performance of the optimizer on a particular trial: .. math:: S_{pmtn} = \textrm{cumm-min}_t F_{pmtn}\,. All of the aggregate quantities described here are computed by :func:`.experiment_analysis.compute_aggregates` (which is called by `bayesmark-anal <#analyze-and-summarize-results>`_) in either the ``agg_result`` or ``summary`` xarray datasets. Additionally, the baseline performances are in the xarray dataset ``baseline_ds`` from :func:`.experiment_baseline.compute_baseline`. The baseline dataset can be generated via the ``bayesmark-baseline`` command, but it is called automatically by ``bayesmark-anal`` if needed. Median scores ------------- The more robust, but less decision-theoretically appealing method for aggregation is to look at median scores. On a per problem basis we simply consider the median (``agg_result[PERF_MED]``): .. math:: \textrm{med-perf}_{pmt} = \textrm{median}_n \, S_{pmtn} \,. However, this score is not very comparable across different problems as the objectives are all on different scales with possible different units. Therefore, we decide the *normalized score* (``agg_result[NORMED_MED]``) in a way that is *invariant* to linear transformation of the objective function: .. math:: \textrm{norm-med-perf}_{pmt} = \frac{\textrm{med-perf}_{pmt} - \textrm{opt}_p}{\textrm{rand-med-perf}_{pt} - \textrm{opt}_p} \,, where :math:`\textrm{opt}_p` (``baseline_ds[PERF_BEST]``) is an estimate of the global minimum of objective function :math:`p`; and :math:`\textrm{rand-med-perf}_{pt}` is the median performance of random search at batch :math:`t` on objective function :math:`p`. This means that, on any objective, an optimizer has score 0 after converging to the global minimum; and random search performs as a straight line at 1 for all :math:`t`. Conceptually, the median random search performance (``baseline_ds[PERF_MED]``) is computed as: .. math:: \textrm{rand-med-perf}_{pt} = \textrm{median}_n \, S_{pmtn} \,, with :math:`m=` random search. However, every observation of :math:`F_{pmtn}` is iid in the case of random search. There is no reason to break the samples apart into trials :math:`n`. Instead, we use the function :func:`.quantiles.min_quantile_CI` to compute a more statistically efficient pooled estimator using the pooled random search samples over :math:`t` and :math:`n`. This pooled method is a nonparametric estimator of the quantiles of the minimum over a batch of samples, which is distribution free. To further aggregate the performance over all objectives for a single optimizer we can consider the median-of-medians (``summary[PERF_MED]``): .. math:: \textrm{med-perf}_{mt} = \textrm{median}_p \, \textrm{norm-med-perf}_{pmt} \,. Combining scores across different problems is sensible here because we have transformed them all onto the same scale. Mean scores ----------- From a decision theoretical perspective it is more sensible to consider the mean (possible warped) score. The median score can hide a high percentage of runs that completely fail. However, when we look at the mean score we first take the clipped score with a baseline value: .. math:: S'_{pmtn} = \min(S_{pmtn}, \textrm{clip}_p) \,. This is largely because there may be a non-zero probably of :math:`F = \infty` (as in when the objective function crashes), which means that mean random search performance is infinite loss. We set :math:`\textrm{clip}_p` (``baseline_ds[PERF_CLIP]``) to the median score after a single function evaluation, which is :math:`\textrm{rand-med-perf}_{p0}` for a batch size of 1. The mean performance on a single problem (``agg_result[PERF_MEAN]``) then becomes: .. math:: \textrm{mean-perf}_{pmt} = \textrm{mean}_n \, S'_{pmtn} \,. Which then becomes a normalized performance (``agg_result[NORMED_MEAN]``) of: .. math:: \textrm{norm-mean-perf}_{pmt} = \frac{\textrm{mean-perf}_{pmt} - \textrm{opt}_p}{\textrm{clip}_p - \textrm{opt}_p} \,. Note there that the random search performance is only 1 at the first batch unlike for :math:`\textrm{norm-med-perf}_{pmt}`. Again we can aggregate this into all objective function performance with (``summary[PERF_MEAN]``): .. math:: \textrm{mean-perf}_{mt} = \textrm{mean}_p \, \textrm{norm-mean-perf}_{pmt} \,, which is a mean-of-means (or *grand mean*), which is much more sensible in general than a median-of-medians. We can again obtain the property of random search having a constant performance of 1 for all :math:`t` using (``summary[NORMED_MEAN]``): .. math:: \textrm{norm-mean-perf}_{mt} = \frac{\textrm{mean-perf}_{mt}}{\textrm{rand-mean-perf}_{t}} \,, where the random search baseline has been determined with the same sequence of equations as the other methods. These all collapse down to: .. math:: \textrm{rand-mean-perf}_{t} = \textrm{mean}_p \, \frac{\textrm{rand-mean-perf}_{pt} - \textrm{opt}_p}{\textrm{clip}_p - \textrm{opt}_p} \,. Conceptually, we compute this random search baseline (``baseline_ds[PERF_MEAN]``) as: .. math:: \textrm{rand-mean-perf}_{pt} = \textrm{mean}_n \, S'_{pmtn} \,, with :math:`m=` random search. However, because all function evaluations for random search are iid across :math:`t`, we can use a more statistically efficient pooled estimator :func:`.expected_max.expected_min`, which is an unbiased distribution free estimator on the expected minimum of :math:`m` samples from a distribution. Note that :math:`\textrm{norm-mean-perf}_{mt}` is, in aggregate, a linear transformation on the expected loss :math:`S'`. This makes it more justified in a decision theory framework than the median score. However, to view it as a linear transformation we are considering the values in ``baseline_ds`` to be fixed reference losses values and not the output from the experiment. Error bars ---------- The datasets ``agg_result`` and ``summary`` also compute error bars in the form of ``LB_`` and ``UB_`` variables. These error bars do not consider the random variation in the baseline quantities from ``baseline_ds`` like ``opt`` and ``clip``. They are instead treated as fixed constant reference points. Therefore, they are computed by a different command ``bayesmark-baseline``. The user can generate the baselines when they want, but since they are not considered a random quantity in the statistics they are not automatically generated from the experimental data (unless the baseline file ``derived/baseline.json`` is missing). Additionally, the error bars on the grand mean (``summary[PERF_MEAN]``) are computed by simply using t-statistic based error bars on the individual means. Under a "random effects" model, this does not actually lose any statistical power. However, this is computing the mean on the loss over sampling from new problems under the "same distribution" of benchmark problems. These error bars will be wider than if we computed the error bars on the grand mean over this particular set of benchmark problems. ================================================ FILE: example_opt_root/config.json ================================================ { "Flaky": [ "flaky_optimizer.py", {} ], "HyperOpt-New": [ "hyperopt_optimizer.py", {} ], "Nevergrad-OnePlusOne-New": [ "nevergrad_optimizer.py", { "budget": 300, "tool": "OnePlusOne" } ], "OpenTuner-BanditA-New": [ "opentuner_optimizer.py", { "techniques": [ "AUCBanditMetaTechniqueA" ] } ], "OpenTuner-GA-DE-New": [ "opentuner_optimizer.py", { "techniques": [ "PSO_GA_DE" ] } ], "OpenTuner-GA-New": [ "opentuner_optimizer.py", { "techniques": [ "PSO_GA_Bandit" ] } ], "PySOT-New": [ "pysot_optimizer.py", {} ], "RandomSearch-New": [ "random_optimizer.py", {} ], "Scikit-GBRT-Hedge-New": [ "scikit_optimizer.py", { "acq_func": "gp_hedge", "base_estimator": "GBRT", "n_initial_points": 5 } ], "Scikit-GP-Hedge-New": [ "scikit_optimizer.py", { "acq_func": "gp_hedge", "base_estimator": "GP", "n_initial_points": 5 } ], "Scikit-GP-LCB-New": [ "scikit_optimizer.py", { "acq_func": "LCB", "base_estimator": "GP", "n_initial_points": 5 } ] } ================================================ FILE: example_opt_root/flaky_optimizer.py ================================================ from time import sleep import bayesmark.random_search as rs from bayesmark import np_util from bayesmark.abstract_optimizer import AbstractOptimizer from bayesmark.experiment import experiment_main class FlakyOptimizer(AbstractOptimizer): def __init__(self, api_config, random=np_util.random): """Build wrapper class to use random search function in benchmark. Settings for `suggest_dict` can be passed using kwargs. Parameters ---------- api_config : dict-like of dict-like Configuration of the optimization variables. See API description. """ AbstractOptimizer.__init__(self, api_config) self.random = random self.mode = self.random.choice(["normal", "crash", "delay"]) def suggest(self, n_suggestions=1): """Get suggestion. Parameters ---------- n_suggestions : int Desired number of parallel suggestions in the output Returns ------- next_guess : list of dict List of `n_suggestions` suggestions to evaluate the objective function. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. """ if self.random.rand() <= 0.5 or self.mode == "normal": x_guess = rs.suggest_dict([], [], self.api_config, n_suggestions=n_suggestions, random=self.random) elif self.mode == "delay": sleep(15 * 60) # 15 minutes x_guess = rs.suggest_dict([], [], self.api_config, n_suggestions=n_suggestions, random=self.random) elif self.mode == "crash": assert False, "Crashing for testing purposes" else: assert False, "Crashing, not for testing purposes" return x_guess def observe(self, X, y): """Feed an observation back. Parameters ---------- X : list of dict-like Places where the objective function has already been evaluated. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. y : array-like, shape (n,) Corresponding values where objective has been evaluated """ # Random search so don't do anything pass if __name__ == "__main__": experiment_main(FlakyOptimizer) ================================================ FILE: example_opt_root/hyperopt_optimizer.py ================================================ import numpy as np from hyperopt import hp, tpe from hyperopt.base import JOB_STATE_DONE, JOB_STATE_NEW, STATUS_OK, Domain, Trials from scipy.interpolate import interp1d from bayesmark.abstract_optimizer import AbstractOptimizer from bayesmark.experiment import experiment_main from bayesmark.np_util import random as np_random from bayesmark.np_util import random_seed # Sklearn prefers str to unicode: DTYPE_MAP = {"real": float, "int": int, "bool": bool, "cat": str, "ordinal": str} def dummy_f(x): assert False, "This is a placeholder, it should never be called." def only(x): y, = x return y class HyperoptOptimizer(AbstractOptimizer): primary_import = "hyperopt" def __init__(self, api_config, random=np_random): """Build wrapper class to use hyperopt optimizer in benchmark. Parameters ---------- api_config : dict-like of dict-like Configuration of the optimization variables. See API description. """ AbstractOptimizer.__init__(self, api_config) self.random = random space, self.round_to_values = HyperoptOptimizer.get_hyperopt_dimensions(api_config) self.domain = Domain(dummy_f, space, pass_expr_memo_ctrl=None) self.trials = Trials() # Some book keeping like opentuner wrapper self.trial_id_lookup = {} # Store just for data validation self.param_set_chk = frozenset(api_config.keys()) @staticmethod def hashable_dict(d): """A custom function for hashing dictionaries. Parameters ---------- d : dict or dict-like The dictionary to be converted to immutable/hashable type. Returns ------- hashable_object : frozenset of tuple pairs Bijective equivalent to dict that can be hashed. """ hashable_object = frozenset(d.items()) return hashable_object @staticmethod def get_hyperopt_dimensions(api_config): """Help routine to setup hyperopt search space in constructor. Take api_config as argument so this can be static. """ # The ordering of iteration prob makes no difference, but just to be # safe and consistnent with space.py, I will make sorted. param_list = sorted(api_config.keys()) space = {} round_to_values = {} for param_name in param_list: param_config = api_config[param_name] param_type = param_config["type"] param_space = param_config.get("space", None) param_range = param_config.get("range", None) param_values = param_config.get("values", None) # Some setup for case that whitelist of values is provided: values_only_type = param_type in ("cat", "ordinal") if (param_values is not None) and (not values_only_type): assert param_range is None param_values = np.unique(param_values) param_range = (param_values[0], param_values[-1]) round_to_values[param_name] = interp1d( param_values, param_values, kind="nearest", fill_value="extrapolate" ) if param_type == "int": low, high = param_range if param_space in ("log", "logit"): space[param_name] = hp.qloguniform(param_name, np.log(low), np.log(high), 1) else: space[param_name] = hp.quniform(param_name, low, high, 1) elif param_type == "bool": assert param_range is None assert param_values is None space[param_name] = hp.choice(param_name, (False, True)) elif param_type in ("cat", "ordinal"): assert param_range is None space[param_name] = hp.choice(param_name, param_values) elif param_type == "real": low, high = param_range if param_space in ("log", "logit"): space[param_name] = hp.loguniform(param_name, np.log(low), np.log(high)) else: space[param_name] = hp.uniform(param_name, low, high) else: assert False, "type %s not handled in API" % param_type return space, round_to_values def get_trial(self, trial_id): for trial in self.trials._dynamic_trials: if trial["tid"] == trial_id: assert isinstance(trial, dict) # Make sure right kind of dict assert "state" in trial and "result" in trial assert trial["state"] == JOB_STATE_NEW return trial assert False, "No matching trial ID" def cleanup_guess(self, x_guess): assert isinstance(x_guess, dict) # Also, check the keys are only the vars we are searching over: assert frozenset(x_guess.keys()) == self.param_set_chk # Do the rounding # Make a copy to be safe, and also unpack singletons # We may also need to consider clip_chk at some point like opentuner x_guess = {k: only(x_guess[k]) for k in x_guess} for param_name, round_f in self.round_to_values.items(): x_guess[param_name] = round_f(x_guess[param_name]) # Also ensure this is correct dtype so sklearn is happy x_guess = {k: DTYPE_MAP[self.api_config[k]["type"]](x_guess[k]) for k in x_guess} return x_guess def _suggest(self): """Helper function to `suggest` that does the work of calling `hyperopt` via its dumb API. """ new_ids = self.trials.new_trial_ids(1) assert len(new_ids) == 1 self.trials.refresh() seed = random_seed(self.random) new_trials = tpe.suggest(new_ids, self.domain, self.trials, seed) assert len(new_trials) == 1 self.trials.insert_trial_docs(new_trials) self.trials.refresh() new_trial, = new_trials # extract singleton return new_trial def suggest(self, n_suggestions=1): """Make `n_suggestions` suggestions for what to evaluate next. This requires the user observe all previous suggestions before calling again. Parameters ---------- n_suggestions : int The number of suggestions to return. Returns ------- next_guess : list of dict List of `n_suggestions` suggestions to evaluate the objective function. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. """ assert n_suggestions >= 1, "invalid value for n_suggestions" # Get the new trials, it seems hyperopt either uses random search or # guesses one at a time anyway, so we might as welll call serially. new_trials = [self._suggest() for _ in range(n_suggestions)] X = [] for trial in new_trials: x_guess = self.cleanup_guess(trial["misc"]["vals"]) X.append(x_guess) # Build lookup to get original trial object x_guess_ = HyperoptOptimizer.hashable_dict(x_guess) assert x_guess_ not in self.trial_id_lookup, "the suggestions should not already be in the trial dict" self.trial_id_lookup[x_guess_] = trial["tid"] assert len(X) == n_suggestions return X def observe(self, X, y): """Feed the observations back to hyperopt. Parameters ---------- X : list of dict-like Places where the objective function has already been evaluated. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. y : array-like, shape (n,) Corresponding values where objective has been evaluated. """ assert len(X) == len(y) for x_guess, y_ in zip(X, y): x_guess_ = HyperoptOptimizer.hashable_dict(x_guess) assert x_guess_ in self.trial_id_lookup, "Appears to be guess that did not originate from suggest" trial_id = self.trial_id_lookup.pop(x_guess_) trial = self.get_trial(trial_id) assert self.cleanup_guess(trial["misc"]["vals"]) == x_guess, "trial ID not consistent with x values stored" # Cast to float to ensure native type result = {"loss": float(y_), "status": STATUS_OK} trial["state"] = JOB_STATE_DONE trial["result"] = result # hyperopt.fmin.FMinIter.serial_evaluate only does one refresh at end # of loop of a bunch of evals, so we will do the same thing here. self.trials.refresh() if __name__ == "__main__": experiment_main(HyperoptOptimizer) ================================================ FILE: example_opt_root/nevergrad_optimizer.py ================================================ import nevergrad.optimization as optimization import numpy as np from nevergrad import instrumentation as inst from scipy.stats import norm from bayesmark.abstract_optimizer import AbstractOptimizer from bayesmark.experiment import experiment_main from bayesmark.np_util import linear_rescale from bayesmark.space import Real class NevergradOptimizer(AbstractOptimizer): primary_import = "nevergrad" def __init__(self, api_config, tool="OnePlusOne", budget=300): """Build wrapper class to use nevergrad optimizer in benchmark. Parameters ---------- api_config : dict-like of dict-like Configuration of the optimization variables. See API description. budget : int Expected number of max function evals """ AbstractOptimizer.__init__(self, api_config) self.instrum, self.space = NevergradOptimizer.get_nvg_dimensions(api_config) dimension = self.instrum.dimension opt_class = optimization.registry[tool] self.optim = opt_class(dimension=dimension, budget=budget) @staticmethod def get_nvg_dimensions(api_config): """Help routine to setup nevergrad search space in constructor. Take api_config as argument so this can be static. """ # The ordering of iteration prob makes no difference, but just to be # safe and consistnent with space.py, I will make sorted. param_list = sorted(api_config.keys()) all_args = {} all_prewarp = {} for param_name in param_list: param_config = api_config[param_name] param_type = param_config["type"] param_space = param_config.get("space", None) param_range = param_config.get("range", None) param_values = param_config.get("values", None) prewarp = None if param_type == "cat": assert param_space is None assert param_range is None arg = inst.var.SoftmaxCategorical(param_values) elif param_type == "bool": assert param_space is None assert param_range is None assert param_values is None arg = inst.var.OrderedDiscrete([False, True]) elif param_values is not None: assert param_type in ("int", "ordinal", "real") arg = inst.var.OrderedDiscrete(param_values) # We are throwing away information here, but OrderedDiscrete # appears to be invariant to monotonic transformation anyway. elif param_type == "int": assert param_values is None # Need +1 since API in inclusive choices = range(int(param_range[0]), int(param_range[-1]) + 1) arg = inst.var.OrderedDiscrete(choices) # We are throwing away information here, but OrderedDiscrete # appears to be invariant to monotonic transformation anyway. elif param_type == "real": assert param_values is None assert param_range is not None # Will need to warp to this space sep. arg = inst.var.Gaussian(mean=0, std=1) prewarp = Real(warp=param_space, range_=param_range) else: assert False, "type %s not handled in API" % param_type all_args[param_name] = arg all_prewarp[param_name] = prewarp instrum = inst.Instrumentation(**all_args) return instrum, all_prewarp def prewarp(self, xx): """Extra work needed to get variables into the Gaussian space representation.""" xxw = {} for arg_name, vv in xx.items(): assert np.isscalar(vv) space = self.space[arg_name] if space is not None: # Warp so we think it is apriori uniform in [a, b] vv = space.warp(vv) assert vv.size == 1 # Now make uniform on [0, 1], also unpack warped to scalar (lb, ub), = space.get_bounds() vv = linear_rescale(vv.item(), lb, ub, 0, 1) # Now make std Gaussian apriori vv = norm.ppf(vv) assert np.isscalar(vv) xxw[arg_name] = vv return xxw def postwarp(self, xxw): """Extra work needed to undo the Gaussian space representation.""" xx = {} for arg_name, vv in xxw.items(): assert np.isscalar(vv) space = self.space[arg_name] if space is not None: # Now make std Gaussian apriori vv = norm.cdf(vv) # Now make uniform on [0, 1] (lb, ub), = space.get_bounds() vv = linear_rescale(vv, 0, 1, lb, ub) # Warp so we think it is apriori uniform in [a, b] vv = space.unwarp([vv]) assert np.isscalar(vv) xx[arg_name] = vv return xx def suggest(self, n_suggestions=1): """Get suggestion from nevergrad. Parameters ---------- n_suggestions : int Desired number of parallel suggestions in the output Returns ------- next_guess : list of dict List of `n_suggestions` suggestions to evaluate the objective function. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. """ x_guess_data = [self.optim.ask() for _ in range(n_suggestions)] x_guess = [None] * n_suggestions for ii, xx in enumerate(x_guess_data): x_pos, x_kwarg = self.instrum.data_to_arguments(xx) assert x_pos == () x_guess[ii] = self.postwarp(x_kwarg) return x_guess def observe(self, X, y): """Feed an observation back to nevergrad. Parameters ---------- X : list of dict-like Places where the objective function has already been evaluated. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. y : array-like, shape (n,) Corresponding values where objective has been evaluated """ for xx, yy in zip(X, y): xx = self.prewarp(xx) xx = self.instrum.arguments_to_data(**xx) self.optim.tell(xx, yy) if __name__ == "__main__": experiment_main(NevergradOptimizer) ================================================ FILE: example_opt_root/opentuner_optimizer.py ================================================ """ In opentuner, many search techniques are already available. All the names of the techniques can be found as follows: ``` >>> import opentuner >>> techniques, generators = opentuner.search.technique.all_techniques() >>> for t in techniques: ... print t.name ``` A user can also create new search techniques (http://opentuner.org/tutorial/techniques/). Opentuner will create a multi-arm bandit of multiple techniques if more than one technique is specified in `args.technique`. Some bandits with pre-defined techniques are already registered in: `opentuner.search.bandittechniques` By default, we use a pre-defined bandit called `'AUCBanditMetaTechniqueA'` of 4 techniques: ``` register(AUCBanditMetaTechnique([ differentialevolution.DifferentialEvolutionAlt(), evolutionarytechniques.UniformGreedyMutation(), evolutionarytechniques.NormalGreedyMutation(mutation_rate=0.3), simplextechniques.RandomNelderMead()], name='AUCBanditMetaTechniqueA')) ``` The other two bandits used in our experiments are: PSO_GA_DE and PSO_GA_Bandit. Specifying a list of multiple techniques will use a multi-arm bandit over them. """ import warnings from argparse import Namespace import opentuner.tuningrunmain from opentuner.api import TuningRunManager from opentuner.measurement.interface import DefaultMeasurementInterface as DMI from opentuner.resultsdb.models import DesiredResult, Result from opentuner.search.manipulator import ( ConfigurationManipulator, EnumParameter, FloatParameter, IntegerParameter, LogFloatParameter, LogIntegerParameter, ScaledNumericParameter, ) from bayesmark.abstract_optimizer import AbstractOptimizer from bayesmark.experiment import experiment_main from bayesmark.np_util import clip_chk DEFAULT_TECHNIQUES = ("AUCBanditMetaTechniqueA",) MEMORY_ONLY_DB = "sqlite://" # Monkey patch here! Opentuner is messed up, TuningRunMain changes global log # settings. We should file in issue report here and have them fix it. opentuner.tuningrunmain.init_logging = lambda: None def ClippedParam(cls, epsilon=1e-5): """Build wrapper class of opentuner parameter class that use clip check to keep parameters in the allowed range despite numerical errors. Class built on `ScaledNumericParameter` abstract class defined in: `opentuner.search.manipulator.ScaledNumericParameter`. Parameters ---------- cls : ScaledNumericParameter Opentuner parameter class, such as `LogFloatParameter` or `FloatParameter`, which transforms the domain of parameter. Returns ------- StableClass : ScaledNumericParameter New class equivalent to original `cls` but it overwrites the orginal `_unscale` method to enforce a clip check to keep the parameters within their allowed range. """ assert issubclass( cls, ScaledNumericParameter ), "this class cls should inherit from the ScaledNumericParameter class" class StableClass(cls): def _unscale(self, v): unscaled_v = super(StableClass, self)._unscale(v) unscaled_v = clip_chk(unscaled_v, self.min_value, self.max_value) return unscaled_v return StableClass class OpentunerOptimizer(AbstractOptimizer): primary_import = "opentuner" def __init__(self, api_config, techniques=DEFAULT_TECHNIQUES, n_suggestions=1): """Build wrapper class to use opentuner optimizer in benchmark. Parameters ---------- api_config : dict-like of dict-like Configuration of the optimization variables. See API description. techniques : iterable of strings A list or tuple of techniques to use in opentuner. If the list has only one technique, then that technique will be used. If the list has multiple techniques a bandit over those techniques will be used. n_suggestions : int Default number of suggestions to be made in parallel. """ AbstractOptimizer.__init__(self, api_config) # Opentuner requires DesiredResult to reference suggestion when making # its observation. x_to_dr maps the dict suggestion to DesiredResult. self.x_to_dr = {} # Keep last suggested x and repeat it whenever opentuner gives up. self.dummy_suggest = None """Setting up the arguments for opentuner. You can see all possible arguments using: ``` >>> import opentuner >>> opentuner.default_argparser().parse_args(['-h']) ``` We only change a few arguments (other arguments are set to defaults): * database = MEMORY_ONLY_DB: to use an in-memory sqlite database * parallelism = n_suggestions: num of suggestions to give in parallel * technique = techniques: a list of techniques to be used by opentuner * print_params = False: to avoid opentuner from exiting after printing param spaces """ args = Namespace( bail_threshold=500, database=MEMORY_ONLY_DB, display_frequency=10, generate_bandit_technique=False, label=None, list_techniques=False, machine_class=None, no_dups=False, parallel_compile=False, parallelism=n_suggestions, pipelining=0, print_params=False, print_search_space_size=False, quiet=False, results_log=None, results_log_details=None, seed_configuration=[], stop_after=None, technique=techniques, test_limit=5000, ) # Setup some dummy classes required by opentuner to actually run. manipulator = OpentunerOptimizer.build_manipulator(api_config) interface = DMI(args=args, manipulator=manipulator) self.api = TuningRunManager(interface, args) @staticmethod def hashable_dict(d): """A custom function for hashing dictionaries. Parameters ---------- d : dict or dict-like The dictionary to be converted to immutable/hashable type. Returns ------- hashable_object : frozenset of tuple pairs Bijective equivalent to dict that can be hashed. """ hashable_object = frozenset(d.items()) return hashable_object @staticmethod def build_manipulator(api_config): """Build a ConfigurationManipulator object to be used by opentuner. Parameters ---------- api_config : dict-like of dict-like Configuration of the optimization variables. See API description. Returns ------- manipulator : ConfigurationManipulator Some over complexified class required by opentuner to run. """ manipulator = ConfigurationManipulator() for pname in api_config: ptype = api_config[pname]["type"] pspace = api_config[pname].get("space", None) pmin, pmax = api_config[pname].get("range", (None, None)) if ptype == "real": if pspace in ("linear", "logit"): ot_param = FloatParameter(pname, pmin, pmax) elif pspace in ("log", "bilog"): LogFloatParameter_ = ClippedParam(LogFloatParameter) ot_param = LogFloatParameter_(pname, pmin, pmax) else: assert False, "unsupported param space = %s" % pspace elif ptype == "int": if pspace in ("linear", "logit"): ot_param = IntegerParameter(pname, pmin, pmax) elif pspace in ("log", "bilog"): ot_param = LogIntegerParameter(pname, pmin, pmax) else: assert False, "unsupported param space = %s" % pspace elif ptype == "bool": # The actual bool parameter seems not to work in Py3 :( ot_param = IntegerParameter(pname, 0, 1) elif ptype in ("cat", "ordinal"): # Treat ordinal and categorical variables the same for now. assert "values" in api_config[pname] pvalues = api_config[pname]["values"] ot_param = EnumParameter(pname, pvalues) else: assert False, "type=%s/space=%s not handled in opentuner yet" % (ptype, pspace) manipulator.add_parameter(ot_param) return manipulator def suggest(self, n_suggestions=1): """Make `n_suggestions` suggestions for what to evaluate next. This requires the user observe all previous suggestions before calling again. Parameters ---------- n_suggestions : int The number of suggestions to return. Returns ------- next_guess : list of dict List of `n_suggestions` suggestions to evaluate the objective function. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. """ assert n_suggestions >= 1, "invalid value for n_suggestions" # Update the n_suggestions if it is different from the current setting. if self.api.search_driver.args.parallelism != n_suggestions: self.api.search_driver.args.parallelism = n_suggestions warnings.warn("n_suggestions changed across suggest calls") # Require the user to already observe all previous suggestions. # Otherwise, opentuner will just recycle old suggestions. assert len(self.x_to_dr) == 0, "all the previous suggestions should have been observed by now" # The real meat of suggest from opentuner: Get next `n_suggestions` # unique suggestions. desired_results = [self.api.get_next_desired_result() for _ in range(n_suggestions)] # Save DesiredResult object in dict since observe will need it. X = [] using_dummy_suggest = False for ii in range(n_suggestions): # Opentuner can give up, but the API requires guessing forever. if desired_results[ii] is None: assert self.dummy_suggest is not None, "opentuner gave up on the first call!" # Use the dummy suggestion in this case. X.append(self.dummy_suggest) using_dummy_suggest = True continue # Get the simple dict equivalent to suggestion. x_guess = desired_results[ii].configuration.data X.append(x_guess) # Now save the desired result for future use in observe. x_guess_ = OpentunerOptimizer.hashable_dict(x_guess) assert x_guess_ not in self.x_to_dr, "the suggestions should not already be in the x_to_dr dict" self.x_to_dr[x_guess_] = desired_results[ii] # This will also catch None from opentuner. assert isinstance(self.x_to_dr[x_guess_], DesiredResult) assert len(X) == n_suggestions, "incorrect number of suggestions provided by opentuner" # Log suggestion for repeating if opentuner gives up next time. We can # only do this when it is not already being used since it we will be # checking guesses against dummy_suggest in observe. if not using_dummy_suggest: self.dummy_suggest = X[-1] return X def observe(self, X, y): """Feed the observations back to opentuner. Parameters ---------- X : list of dict-like Places where the objective function has already been evaluated. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. y : array-like, shape (n,) Corresponding values where objective has been evaluated. """ assert len(X) == len(y) for x_guess, y_ in zip(X, y): x_guess_ = OpentunerOptimizer.hashable_dict(x_guess) # If we can't find the dr object then it must be the dummy guess. if x_guess_ not in self.x_to_dr: assert x_guess == self.dummy_suggest, "Appears to be guess that did not originate from suggest" continue # Get the corresponding DesiredResult object. dr = self.x_to_dr.pop(x_guess_, None) # This will also catch None from opentuner. assert isinstance(dr, DesiredResult), "DesiredResult object not available in x_to_dr" # Opentuner's arg names assume we are minimizing execution time. # So, if we want to minimize we have to pretend y is a 'time'. result = Result(time=y_) self.api.report_result(dr, result) if __name__ == "__main__": experiment_main(OpentunerOptimizer) ================================================ FILE: example_opt_root/pysot_optimizer.py ================================================ import warnings from copy import copy import numpy as np from poap.strategy import EvalRecord from pySOT.experimental_design import SymmetricLatinHypercube from pySOT.optimization_problems import OptimizationProblem from pySOT.strategy import SRBFStrategy from pySOT.surrogate import CubicKernel, LinearTail, RBFInterpolant from bayesmark.abstract_optimizer import AbstractOptimizer from bayesmark.experiment import experiment_main from bayesmark.space import JointSpace class PySOTOptimizer(AbstractOptimizer): primary_import = "pysot" def __init__(self, api_config): """Build wrapper class to use an optimizer in benchmark. Parameters ---------- api_config : dict-like of dict-like Configuration of the optimization variables. See API description. """ AbstractOptimizer.__init__(self, api_config) self.space_x = JointSpace(api_config) self.bounds = self.space_x.get_bounds() self.create_opt_prob() # Sets up the optimization problem (needs self.bounds) self.max_evals = np.iinfo(np.int32).max # NOTE: Largest possible int self.batch_size = None self.history = [] self.proposals = [] def create_opt_prob(self): """Create an optimization problem object.""" opt = OptimizationProblem() opt.lb = self.bounds[:, 0] # In warped space opt.ub = self.bounds[:, 1] # In warped space opt.dim = len(self.bounds) opt.cont_var = np.arange(len(self.bounds)) opt.int_var = [] assert len(opt.cont_var) + len(opt.int_var) == opt.dim opt.objfun = None self.opt = opt def start(self, max_evals): """Starts a new pySOT run.""" self.history = [] self.proposals = [] # Symmetric Latin hypercube design des_pts = max([self.batch_size, 2 * (self.opt.dim + 1)]) slhd = SymmetricLatinHypercube(dim=self.opt.dim, num_pts=des_pts) # Warped RBF interpolant rbf = RBFInterpolant( dim=self.opt.dim, lb=self.opt.lb, ub=self.opt.ub, kernel=CubicKernel(), tail=LinearTail(self.opt.dim), eta=1e-4, ) # Optimization strategy self.strategy = SRBFStrategy( max_evals=self.max_evals, opt_prob=self.opt, exp_design=slhd, surrogate=rbf, asynchronous=True, batch_size=1, use_restarts=True, ) def suggest(self, n_suggestions=1): """Get a suggestion from the optimizer. Parameters ---------- n_suggestions : int Desired number of parallel suggestions in the output Returns ------- next_guess : list of dict List of `n_suggestions` suggestions to evaluate the objective function. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. """ if self.batch_size is None: # First call to suggest self.batch_size = n_suggestions self.start(self.max_evals) # Set the tolerances pretending like we are running batch d, p = float(self.opt.dim), float(n_suggestions) self.strategy.failtol = p * int(max(np.ceil(d / p), np.ceil(4 / p))) # Now we can make suggestions x_w = [] self.proposals = [] for _ in range(n_suggestions): proposal = self.strategy.propose_action() record = EvalRecord(proposal.args, status="pending") proposal.record = record proposal.accept() # This triggers all the callbacks # It is possible that pySOT proposes a previously evaluated point # when all variables are integers, so we just abort in this case # since we have likely converged anyway. See PySOT issue #30. x = list(proposal.record.params) # From tuple to list x_unwarped, = self.space_x.unwarp(x) if x_unwarped in self.history: warnings.warn("pySOT proposed the same point twice") self.start(self.max_evals) return self.suggest(n_suggestions=n_suggestions) # NOTE: Append unwarped to avoid rounding issues self.history.append(copy(x_unwarped)) self.proposals.append(proposal) x_w.append(copy(x_unwarped)) return x_w def _observe(self, x, y): # Find the matching proposal and execute its callbacks idx = [x == xx for xx in self.history] i = np.argwhere(idx)[0].item() # Pick the first index if there are ties proposal = self.proposals[i] proposal.record.complete(y) self.proposals.pop(i) self.history.pop(i) def observe(self, X, y): """Send an observation of a suggestion back to the optimizer. Parameters ---------- X : list of dict-like Places where the objective function has already been evaluated. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. y : array-like, shape (n,) Corresponding values where objective has been evaluated """ assert len(X) == len(y) for x_, y_ in zip(X, y): # Just ignore, any inf observations we got, unclear if right thing if np.isfinite(y_): self._observe(x_, y_) if __name__ == "__main__": experiment_main(PySOTOptimizer) ================================================ FILE: example_opt_root/random_optimizer.py ================================================ import bayesmark.random_search as rs from bayesmark import np_util from bayesmark.abstract_optimizer import AbstractOptimizer from bayesmark.experiment import experiment_main class RandomOptimizer(AbstractOptimizer): # Unclear what is best package to list for primary_import here. primary_import = "bayesmark" def __init__(self, api_config, random=np_util.random): """Build wrapper class to use random search function in benchmark. Settings for `suggest_dict` can be passed using kwargs. Parameters ---------- api_config : dict-like of dict-like Configuration of the optimization variables. See API description. """ AbstractOptimizer.__init__(self, api_config) self.random = random def suggest(self, n_suggestions=1): """Get suggestion. Parameters ---------- n_suggestions : int Desired number of parallel suggestions in the output Returns ------- next_guess : list of dict List of `n_suggestions` suggestions to evaluate the objective function. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. """ x_guess = rs.suggest_dict([], [], self.api_config, n_suggestions=n_suggestions, random=self.random) return x_guess def observe(self, X, y): """Feed an observation back. Parameters ---------- X : list of dict-like Places where the objective function has already been evaluated. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. y : array-like, shape (n,) Corresponding values where objective has been evaluated """ # Random search so don't do anything pass if __name__ == "__main__": experiment_main(RandomOptimizer) ================================================ FILE: example_opt_root/scikit_optimizer.py ================================================ import numpy as np from scipy.interpolate import interp1d from skopt import Optimizer as SkOpt from skopt.space import Categorical, Integer, Real from bayesmark.abstract_optimizer import AbstractOptimizer from bayesmark.experiment import experiment_main class ScikitOptimizer(AbstractOptimizer): primary_import = "scikit-optimize" def __init__(self, api_config, base_estimator="GP", acq_func="gp_hedge", n_initial_points=5): """Build wrapper class to use an optimizer in benchmark. Parameters ---------- api_config : dict-like of dict-like Configuration of the optimization variables. See API description. base_estimator : {'GP', 'RF', 'ET', 'GBRT'} How to estimate the objective function. acq_func : {'LCB', 'EI', 'PI', 'gp_hedge', 'EIps', 'PIps'} Acquisition objective to decide next suggestion. n_initial_points : int Number of points to sample randomly before actual Bayes opt. """ AbstractOptimizer.__init__(self, api_config) dimensions, self.round_to_values = ScikitOptimizer.get_sk_dimensions(api_config) # Older versions of skopt don't copy over the dimensions names during # normalization and hence the names are missing in # self.skopt.space.dimensions. Therefore, we save our own copy of # dimensions list to be safe. If we can commit to using the newer # versions of skopt we can delete self.dimensions. self.dimensions_list = tuple(dd.name for dd in dimensions) self.skopt = SkOpt( dimensions, n_initial_points=n_initial_points, base_estimator=base_estimator, acq_func=acq_func, acq_optimizer="auto", acq_func_kwargs={}, acq_optimizer_kwargs={}, ) @staticmethod def get_sk_dimensions(api_config, transform="normalize"): """Help routine to setup skopt search space in constructor. Take api_config as argument so this can be static. """ # The ordering of iteration prob makes no difference, but just to be # safe and consistnent with space.py, I will make sorted. param_list = sorted(api_config.keys()) sk_dims = [] round_to_values = {} for param_name in param_list: param_config = api_config[param_name] param_type = param_config["type"] param_space = param_config.get("space", None) param_range = param_config.get("range", None) param_values = param_config.get("values", None) # Some setup for case that whitelist of values is provided: values_only_type = param_type in ("cat", "ordinal") if (param_values is not None) and (not values_only_type): assert param_range is None param_values = np.unique(param_values) param_range = (param_values[0], param_values[-1]) round_to_values[param_name] = interp1d( param_values, param_values, kind="nearest", fill_value="extrapolate" ) if param_type == "int": # Integer space in sklearn does not support any warping => Need # to leave the warping as linear in skopt. sk_dims.append(Integer(param_range[0], param_range[-1], transform=transform, name=param_name)) elif param_type == "bool": assert param_range is None assert param_values is None sk_dims.append(Integer(0, 1, transform=transform, name=param_name)) elif param_type in ("cat", "ordinal"): assert param_range is None # Leave x-form to one-hot as per skopt default sk_dims.append(Categorical(param_values, name=param_name)) elif param_type == "real": # Skopt doesn't support all our warpings, so need to pick # closest substitute it does support. prior = "log-uniform" if param_space in ("log", "logit") else "uniform" sk_dims.append(Real(param_range[0], param_range[-1], prior=prior, transform=transform, name=param_name)) else: assert False, "type %s not handled in API" % param_type return sk_dims, round_to_values def suggest(self, n_suggestions=1): """Get a suggestion from the optimizer. Parameters ---------- n_suggestions : int Desired number of parallel suggestions in the output Returns ------- next_guess : list of dict List of `n_suggestions` suggestions to evaluate the objective function. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. """ # First get list of lists from skopt.ask() next_guess = self.skopt.ask(n_points=n_suggestions) # Then convert to list of dicts next_guess = [dict(zip(self.dimensions_list, x)) for x in next_guess] # Now do the rounding, custom rounding is not supported in skopt. Note # that there is not nec a round function for each dimension here. for param_name, round_f in self.round_to_values.items(): for xx in next_guess: xx[param_name] = round_f(xx[param_name]) return next_guess def observe(self, X, y): """Send an observation of a suggestion back to the optimizer. Parameters ---------- X : list of dict-like Places where the objective function has already been evaluated. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. y : array-like, shape (n,) Corresponding values where objective has been evaluated """ # Supposedly skopt can handle blocks, but not sure about interface for # that. Just do loop to be safe for now. for xx, yy in zip(X, y): # skopt needs lists instead of dicts xx = [xx[dim_name] for dim_name in self.dimensions_list] # Just ignore, any inf observations we got, unclear if right thing if np.isfinite(yy): self.skopt.tell(xx, yy) if __name__ == "__main__": experiment_main(ScikitOptimizer) ================================================ FILE: integration_test.sh ================================================ #!/bin/bash set -ex set -o pipefail # Be able to check if using version out of tar ball which bayesmark-launch which bayesmark-exp which bayesmark-agg which bayesmark-anal DB_ROOT=./notebooks DBID=bo_example_folder bayesmark-launch -n 15 -r 2 -dir $DB_ROOT -b $DBID -o RandomSearch PySOT OpenTuner-BanditA -c SVM DT -d boston breast -v bayesmark-agg -dir $DB_ROOT -b $DBID bayesmark-anal -dir $DB_ROOT -b $DBID -v # Try ipynb export python -m ipykernel install --name=bobm_ipynb --user jupyter nbconvert --to html --execute notebooks/plot_mean_score.ipynb --ExecutePreprocessor.timeout=-1 jupyter nbconvert --to html --execute notebooks/plot_test_case.ipynb --ExecutePreprocessor.timeout=-1 # Try dry run bayesmark-launch -n 15 -r 3 -dir $DB_ROOT -b $DBID -o RandomSearch PySOT OpenTuner-BanditA -c SVM DT -nj 50 -v # Try again but use the custom optimizers mv $DB_ROOT/$DBID old bayesmark-launch -n 15 -r 1 -dir $DB_ROOT -b $DBID -o RandomSearch PySOT-New OpenTuner-BanditA-New -c SVM DT --opt-root ./example_opt_root -d boston breast -v bayesmark-agg -dir $DB_ROOT -b $DBID bayesmark-anal -dir $DB_ROOT -b $DBID -v # Export again jupyter nbconvert --to html --execute notebooks/plot_mean_score.ipynb --ExecutePreprocessor.timeout=-1 jupyter nbconvert --to html --execute notebooks/plot_test_case.ipynb --ExecutePreprocessor.timeout=-1 # Try dry run bayesmark-launch -n 15 -r 2 -dir $DB_ROOT -b $DBID -o RandomSearch PySOT-New OpenTuner-BanditA-New -c SVM DT --opt-root ./example_opt_root -nj 50 -v echo "success" ================================================ FILE: integration_test_with_setup.sh ================================================ #!/bin/bash set -ex set -o pipefail export PIP_REQUIRE_VIRTUALENV=false # Handy to know what we are working with git --version python --version pip freeze | sort # Cleanup workspace, src for any old -e installs git clean -x -f -d rm -rf src/ # See if opentuner will work in env (but this command does not work on Mac) # dpkg -l | grep libsqlite # Simulate deployment with wheel ./build_wheel.sh mv -v dist/bayesmark-* dist/bayesmark.tar.gz # Install and run local optimizers mkdir install_test cp -r ./notebooks install_test cp -r ./example_opt_root install_test cd install_test virtualenv bobm_ipynb --python=python3 source ./bobm_ipynb/bin/activate python --version pip freeze | sort # Remove this if we want to make sure everything is compatible with latest # pip install -r ../requirements/optimizers.txt pip install ../dist/bayesmark.tar.gz[optimizers,notebooks] ../integration_test.sh # wrap up deactivate cd .. echo "success with setup wrapper too" ================================================ FILE: notebooks/dummy.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import ipykernel import jupyter import jupyter_core import nbconvert # import extra deps and use it to keep pipreqs and flake8 happy for pkg in (ipykernel, jupyter, jupyter_core, nbconvert): print("%s %s" % (pkg.__name__, pkg.__version__)) ================================================ FILE: notebooks/plot_mean_score.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from matplotlib import cm, colors, rcParams\n", "\n", "import bayesmark.constants as cc\n", "import bayesmark.xr_util as xru\n", "from bayesmark.serialize import XRSerializer\n", "from bayesmark.constants import ITER, METHOD, ARG_DELIM, OBJECTIVE, VISIBLE_TO_OPT\n", "from bayesmark.path_util import abspath\n", "from bayesmark.util import preimage_func" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# User settings, must specify location of the data to make plots here for this to run\n", "DB_ROOT = abspath(\".\")\n", "DBID = \"bo_example_folder\"\n", "metric_for_scoring = VISIBLE_TO_OPT" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Matplotlib setup\n", "# Note this will put type-3 font BS in the pdfs, if it matters\n", "rcParams[\"mathtext.fontset\"] = \"stix\"\n", "rcParams[\"font.family\"] = \"STIXGeneral\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def build_color_dict(names):\n", " \"\"\"Make a color dictionary to give each name a mpl color.\n", " \"\"\"\n", " norm = colors.Normalize(vmin=0, vmax=1)\n", " m = cm.ScalarMappable(norm, cm.tab20)\n", " color_dict = m.to_rgba(np.linspace(0, 1, len(names)))\n", " color_dict = dict(zip(names, color_dict))\n", " return color_dict" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Load the data\n", "summary_ds, meta = XRSerializer.load_derived(DB_ROOT, db=DBID, key=cc.MEAN_SCORE)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "method_to_rgba = build_color_dict(summary_ds.coords[METHOD].values.tolist())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Group methods by the package behind them\n", "method_only = lambda method_rev: method_rev.split(ARG_DELIM, 1)[0]\n", "groups = preimage_func(method_only, summary_ds.coords[METHOD].values)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Make a plot for each package\n", "for method_name in groups:\n", " plt.figure(figsize=(5, 5), dpi=300)\n", " for method_ver_name in groups[method_name]:\n", " curr_ds = summary_ds.sel({METHOD: method_ver_name, OBJECTIVE: metric_for_scoring})\n", " curr_ds.coords[ITER].values\n", "\n", " plt.fill_between(\n", " curr_ds.coords[ITER].values,\n", " curr_ds[cc.LB_MED].values,\n", " curr_ds[cc.UB_MED].values,\n", " color=method_to_rgba[method_ver_name],\n", " alpha=0.5,\n", " )\n", " plt.plot(\n", " curr_ds.coords[ITER].values,\n", " curr_ds[cc.PERF_MED].values,\n", " color=method_to_rgba[method_ver_name],\n", " label=method_name,\n", " marker=\".\",\n", " )\n", " plt.xlabel(\"evaluation\", fontsize=10)\n", " plt.ylabel(\"normalized median score\", fontsize=10)\n", " plt.title(method_name)\n", " plt.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc=\"upper left\", borderaxespad=0.0)\n", " plt.grid()\n", "\n", " plt.figure(figsize=(5, 5), dpi=300)\n", " for method_ver_name in groups[method_name]:\n", " curr_ds = summary_ds.sel({METHOD: method_ver_name, OBJECTIVE: metric_for_scoring})\n", " curr_ds.coords[ITER].values\n", "\n", " plt.fill_between(\n", " curr_ds.coords[ITER].values,\n", " curr_ds[cc.LB_MEAN].values,\n", " curr_ds[cc.UB_MEAN].values,\n", " color=method_to_rgba[method_ver_name],\n", " alpha=0.5,\n", " )\n", " plt.plot(\n", " curr_ds.coords[ITER].values,\n", " curr_ds[cc.PERF_MEAN].values,\n", " color=method_to_rgba[method_ver_name],\n", " label=method_name,\n", " marker=\".\",\n", " )\n", " plt.xlabel(\"evaluation\", fontsize=10)\n", " plt.ylabel(\"mean score\", fontsize=10)\n", " plt.title(method_name)\n", " plt.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc=\"upper left\", borderaxespad=0.0)\n", " plt.grid()\n", "\n", " plt.figure(figsize=(5, 5), dpi=300)\n", " for method_ver_name in groups[method_name]:\n", " curr_ds = summary_ds.sel({METHOD: method_ver_name, OBJECTIVE: metric_for_scoring})\n", " curr_ds.coords[ITER].values\n", "\n", " plt.fill_between(\n", " curr_ds.coords[ITER].values,\n", " curr_ds[cc.LB_NORMED_MEAN].values,\n", " curr_ds[cc.UB_NORMED_MEAN].values,\n", " color=method_to_rgba[method_ver_name],\n", " alpha=0.5,\n", " )\n", " plt.plot(\n", " curr_ds.coords[ITER].values,\n", " curr_ds[cc.NORMED_MEAN].values,\n", " color=method_to_rgba[method_ver_name],\n", " label=method_name,\n", " marker=\".\",\n", " )\n", " plt.xlabel(\"evaluation\", fontsize=10)\n", " plt.ylabel(\"normalized mean score\", fontsize=10)\n", " plt.title(method_name)\n", " plt.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc=\"upper left\", borderaxespad=0.0)\n", " plt.grid()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Make the summary plot\n", "plt.figure(figsize=(5, 5), dpi=300)\n", "for method_ver_name in summary_ds.coords[METHOD].values:\n", " curr_ds = summary_ds.sel({METHOD: method_ver_name, OBJECTIVE: metric_for_scoring})\n", " curr_ds.coords[ITER].values\n", "\n", " plt.fill_between(\n", " curr_ds.coords[ITER].values,\n", " curr_ds[cc.LB_MED].values,\n", " curr_ds[cc.UB_MED].values,\n", " color=method_to_rgba[method_ver_name],\n", " alpha=0.5,\n", " )\n", " plt.plot(\n", " curr_ds.coords[ITER].values,\n", " curr_ds[cc.PERF_MED].values,\n", " color=method_to_rgba[method_ver_name],\n", " label=method_ver_name,\n", " marker=\".\",\n", " )\n", "plt.xlabel(\"evaluation\", fontsize=10)\n", "plt.ylabel(\"normalized median score\", fontsize=10)\n", "plt.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc=\"upper left\", borderaxespad=0.0)\n", "plt.grid()\n", "\n", "plt.figure(figsize=(5, 5), dpi=300)\n", "for method_ver_name in summary_ds.coords[METHOD].values:\n", " curr_ds = summary_ds.sel({METHOD: method_ver_name, OBJECTIVE: metric_for_scoring})\n", " curr_ds.coords[ITER].values\n", "\n", " plt.fill_between(\n", " curr_ds.coords[ITER].values,\n", " curr_ds[cc.LB_MEAN].values,\n", " curr_ds[cc.UB_MEAN].values,\n", " color=method_to_rgba[method_ver_name],\n", " alpha=0.5,\n", " )\n", " plt.plot(\n", " curr_ds.coords[ITER].values,\n", " curr_ds[cc.PERF_MEAN].values,\n", " color=method_to_rgba[method_ver_name],\n", " label=method_ver_name,\n", " marker=\".\",\n", " )\n", "plt.xlabel(\"evaluation\", fontsize=10)\n", "plt.ylabel(\"mean score\", fontsize=10)\n", "plt.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc=\"upper left\", borderaxespad=0.0)\n", "plt.grid()\n", "\n", "plt.figure(figsize=(5, 5), dpi=300)\n", "for method_ver_name in summary_ds.coords[METHOD].values:\n", " curr_ds = summary_ds.sel({METHOD: method_ver_name, OBJECTIVE: metric_for_scoring})\n", " curr_ds.coords[ITER].values\n", "\n", " plt.fill_between(\n", " curr_ds.coords[ITER].values,\n", " curr_ds[cc.LB_NORMED_MEAN].values,\n", " curr_ds[cc.UB_NORMED_MEAN].values,\n", " color=method_to_rgba[method_ver_name],\n", " alpha=0.5,\n", " )\n", " plt.plot(\n", " curr_ds.coords[ITER].values,\n", " curr_ds[cc.NORMED_MEAN].values,\n", " color=method_to_rgba[method_ver_name],\n", " label=method_ver_name,\n", " marker=\".\",\n", " )\n", "plt.xlabel(\"evaluation\", fontsize=10)\n", "plt.ylabel(\"normalized mean score\", fontsize=10)\n", "plt.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc=\"upper left\", borderaxespad=0.0)\n", "plt.grid()" ] } ], "metadata": { "kernelspec": { "display_name": "bobm_ipynb", "language": "python", "name": "bobm_ipynb" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: notebooks/plot_test_case.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "from matplotlib import cm, colors, rcParams\n", "\n", "import numpy as np\n", "\n", "import bayesmark.constants as cc\n", "from bayesmark.path_util import abspath\n", "from bayesmark.serialize import XRSerializer\n", "from bayesmark.constants import ITER, METHOD, TEST_CASE, OBJECTIVE, VISIBLE_TO_OPT" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# User settings, must specify location of the data to make plots here for this to run\n", "DB_ROOT = abspath(\".\")\n", "DBID = \"bo_example_folder\"\n", "metric_for_scoring = VISIBLE_TO_OPT" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Matplotlib setup\n", "# Note this will put type-3 font BS in the pdfs, if it matters\n", "rcParams[\"mathtext.fontset\"] = \"stix\"\n", "rcParams[\"font.family\"] = \"STIXGeneral\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def build_color_dict(names):\n", " \"\"\"Make a color dictionary to give each name a mpl color.\n", " \"\"\"\n", " norm = colors.Normalize(vmin=0, vmax=1)\n", " m = cm.ScalarMappable(norm, cm.tab20)\n", " color_dict = m.to_rgba(np.linspace(0, 1, len(names)))\n", " color_dict = dict(zip(names, color_dict))\n", " return color_dict" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Load the data\n", "agg_results_ds, meta = XRSerializer.load_derived(DB_ROOT, db=DBID, key=cc.PERF_RESULTS)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Setup for plotting\n", "method_list = agg_results_ds.coords[METHOD].values\n", "method_to_rgba = build_color_dict(method_list.tolist())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Make the plots for inidividual test functions\n", "for func_name in agg_results_ds.coords[TEST_CASE].values:\n", " plt.figure(figsize=(5, 5), dpi=300)\n", " for method_name in method_list:\n", " curr_ds = agg_results_ds.sel({TEST_CASE: func_name, METHOD: method_name, OBJECTIVE: metric_for_scoring})\n", "\n", " plt.fill_between(\n", " curr_ds.coords[ITER].values,\n", " curr_ds[cc.LB_MED].values,\n", " curr_ds[cc.UB_MED].values,\n", " color=method_to_rgba[method_name],\n", " alpha=0.5,\n", " )\n", " plt.plot(\n", " curr_ds.coords[ITER].values,\n", " curr_ds[cc.PERF_MED].values,\n", " color=method_to_rgba[method_name],\n", " label=method_name,\n", " marker=\".\",\n", " )\n", " plt.xlabel(\"evaluation\", fontsize=10)\n", " plt.ylabel(\"median score\", fontsize=10)\n", " plt.title(func_name)\n", " plt.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc=\"upper left\", borderaxespad=0.0)\n", " plt.grid()\n", "\n", " plt.figure(figsize=(5, 5), dpi=300)\n", " for method_name in method_list:\n", " curr_ds = agg_results_ds.sel({TEST_CASE: func_name, METHOD: method_name, OBJECTIVE: metric_for_scoring})\n", "\n", " plt.fill_between(\n", " curr_ds.coords[ITER].values,\n", " curr_ds[cc.LB_MEAN].values,\n", " curr_ds[cc.UB_MEAN].values,\n", " color=method_to_rgba[method_name],\n", " alpha=0.5,\n", " )\n", " plt.plot(\n", " curr_ds.coords[ITER].values,\n", " curr_ds[cc.PERF_MEAN].values,\n", " color=method_to_rgba[method_name],\n", " label=method_name,\n", " marker=\".\",\n", " )\n", " plt.xlabel(\"evaluation\", fontsize=10)\n", " plt.ylabel(\"mean score\", fontsize=10)\n", " plt.title(func_name)\n", " plt.legend(fontsize=8, bbox_to_anchor=(1.05, 1), loc=\"upper left\", borderaxespad=0.0)\n", " plt.grid()" ] } ], "metadata": { "kernelspec": { "display_name": "bobm_ipynb", "language": "python", "name": "bobm_ipynb" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: requirements/base.in ================================================ scipy==1.2.0 pandas==0.24.0 pathvalidate==0.29.0 numpy==1.16.1 GitPython==2.1.11 importlib-metadata==0.18 scikit-learn==0.20.2 xarray==0.12.2 ================================================ FILE: requirements/base.txt ================================================ # SHA1:7ebe4df9e60f001b676e74ae561d5dc3202c3dd0 # # This file is autogenerated by pip-compile-multi # To update, run: # # pip-compile-multi # gitdb2==2.0.5 # via gitpython gitpython==2.1.11 # via -r requirements/base.in importlib-metadata==0.18 # via -r requirements/base.in numpy==1.16.1 # via -r requirements/base.in, pandas, scikit-learn, scipy, xarray pandas==0.24.0 # via -r requirements/base.in, xarray pathvalidate==0.29.0 # via -r requirements/base.in python-dateutil==2.8.0 # via pandas pytz==2019.1 # via pandas scikit-learn==0.20.2 # via -r requirements/base.in scipy==1.2.0 # via -r requirements/base.in, scikit-learn six==1.12.0 # via python-dateutil smmap2==2.0.5 # via gitdb2 xarray==0.12.2 # via -r requirements/base.in zipp==0.5.2 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: # setuptools ================================================ FILE: requirements/docs.in ================================================ -r base.in Sphinx==2.1.2 ================================================ FILE: requirements/docs.txt ================================================ # SHA1:cde26afc07f6c9c1c6cb169e125fc5142a0c59ae # # This file is autogenerated by pip-compile-multi # To update, run: # # pip-compile-multi # -r base.txt alabaster==0.7.12 # via sphinx attrs==19.1.0 # via packaging babel==2.7.0 # via sphinx certifi==2019.6.16 # via requests chardet==3.0.4 # via requests docutils==0.15 # via sphinx idna==2.8 # via requests imagesize==1.1.0 # via sphinx jinja2==2.10.1 # via sphinx markupsafe==1.1.1 # via jinja2 packaging==19.1 # via sphinx pygments==2.4.2 # via sphinx pyparsing==2.4.2 # via packaging requests==2.22.0 # via sphinx snowballstemmer==1.9.0 # via sphinx sphinx==2.1.2 # via -r requirements/docs.in sphinxcontrib-applehelp==1.0.1 # via sphinx sphinxcontrib-devhelp==1.0.1 # via sphinx sphinxcontrib-htmlhelp==1.0.2 # via sphinx sphinxcontrib-jsmath==1.0.1 # via sphinx sphinxcontrib-qthelp==1.0.2 # via sphinx sphinxcontrib-serializinghtml==1.1.3 # via sphinx urllib3==1.25.3 # via requests # The following packages are considered to be unsafe in a requirements file: # setuptools ================================================ FILE: requirements/ipynb.in ================================================ -r base.in ipykernel==5.1.1 nbconvert==5.6.0 jupyter==1.0.0 jupyter-core==4.6.0 matplotlib==3.1.1 numpy==1.16.1 ================================================ FILE: requirements/ipynb.txt ================================================ # SHA1:6c16d140e48d7e7fa0e157c053953db7d76f0caf # # This file is autogenerated by pip-compile-multi # To update, run: # # pip-compile-multi # -r base.txt appnope==0.1.0 # via ipython attrs==19.1.0 # via jsonschema backcall==0.1.0 # via ipython bleach==3.1.0 # via nbconvert cycler==0.10.0 # via matplotlib decorator==4.4.0 # via ipython, traitlets defusedxml==0.6.0 # via nbconvert entrypoints==0.3 # via nbconvert ipykernel==5.1.1 # via -r requirements/ipynb.in, ipywidgets, jupyter, jupyter-console, notebook, qtconsole ipython-genutils==0.2.0 # via nbformat, notebook, qtconsole, traitlets ipython==7.6.1 # via ipykernel, ipywidgets, jupyter-console ipywidgets==7.5.1 # via jupyter jedi==0.14.1 # via ipython jinja2==2.10.1 # via nbconvert, notebook jsonschema==3.0.2 # via nbformat jupyter-client==5.3.1 # via ipykernel, jupyter-console, notebook, qtconsole jupyter-console==6.0.0 # via jupyter jupyter-core==4.6.0 # via -r requirements/ipynb.in, jupyter-client, nbconvert, nbformat, notebook, qtconsole jupyter==1.0.0 # via -r requirements/ipynb.in kiwisolver==1.1.0 # via matplotlib markupsafe==1.1.1 # via jinja2 matplotlib==3.1.1 # via -r requirements/ipynb.in mistune==0.8.4 # via nbconvert nbconvert==5.6.0 # via -r requirements/ipynb.in, jupyter, notebook nbformat==4.4.0 # via ipywidgets, nbconvert, notebook notebook==6.0.1 # via jupyter, widgetsnbextension pandocfilters==1.4.2 # via nbconvert parso==0.5.1 # via jedi pexpect==4.7.0 # via ipython pickleshare==0.7.5 # via ipython prometheus-client==0.7.1 # via notebook prompt-toolkit==2.0.9 # via ipython, jupyter-console ptyprocess==0.6.0 # via pexpect, terminado pygments==2.4.2 # via ipython, jupyter-console, nbconvert, qtconsole pyparsing==2.4.2 # via matplotlib pyrsistent==0.15.4 # via jsonschema pyzmq==18.0.2 # via jupyter-client, notebook qtconsole==4.5.5 # via jupyter send2trash==1.5.0 # via notebook terminado==0.8.2 # via notebook testpath==0.4.2 # via nbconvert tornado==6.0.3 # via ipykernel, jupyter-client, notebook, terminado traitlets==4.3.2 # via ipykernel, ipython, ipywidgets, jupyter-client, jupyter-core, nbconvert, nbformat, notebook, qtconsole wcwidth==0.1.7 # via prompt-toolkit webencodings==0.5.1 # via bleach widgetsnbextension==3.5.1 # via ipywidgets # The following packages are considered to be unsafe in a requirements file: # setuptools ================================================ FILE: requirements/optimizers.in ================================================ -r base.in opentuner==0.8.2 numpy==1.16.1 scipy==1.2.0 nevergrad==0.1.4 hyperopt==0.1.1 POAP==0.1.26 scikit-optimize==0.5.2 pySOT==0.3.3 ================================================ FILE: requirements/optimizers.txt ================================================ # SHA1:08174a35f9973427450f549131b4438e2f116a88 # # This file is autogenerated by pip-compile-multi # To update, run: # # pip-compile-multi # -r base.txt argparse==1.4.0 # via opentuner atomicwrites==1.3.0 # via pytest attrs==19.1.0 # via packaging, pytest bayesian-optimization==0.6.0 # via nevergrad certifi==2019.6.16 # via requests chardet==3.0.4 # via requests cma==2.7.0 # via nevergrad coverage==4.5.4 # via nevergrad cycler==0.10.0 # via matplotlib decorator==4.4.0 # via networkx dill==0.3.0 # via pysot fn==0.4.3 # via opentuner future==0.17.1 # via hyperopt, opentuner genty==1.3.2 # via nevergrad hyperopt==0.1.1 # via -r requirements/optimizers.in idna==2.8 # via requests joblib==0.13.2 # via nevergrad kiwisolver==1.1.0 # via matplotlib matplotlib==3.1.1 # via nevergrad more-itertools==7.2.0 # via pytest mypy-extensions==0.4.1 # via mypy mypy==0.720 # via nevergrad networkx==2.3 # via hyperopt nevergrad==0.1.4 # via -r requirements/optimizers.in nose-timer==0.7.5 # via nevergrad nose==1.3.7 # via nevergrad, nose-timer opentuner==0.8.2 # via -r requirements/optimizers.in packaging==19.1 # via pytest pluggy==0.12.0 # via pytest poap==0.1.26 # via -r requirements/optimizers.in, pysot py==1.8.0 # via pytest pydoe2==1.2.0 # via pysot pymongo==3.8.0 # via hyperopt pyparsing==2.4.2 # via matplotlib, packaging pysot==0.3.3 # via -r requirements/optimizers.in pytest==5.0.1 # via pysot requests==2.22.0 # via nevergrad scikit-optimize==0.5.2 # via -r requirements/optimizers.in sqlalchemy==1.3.8 # via opentuner typed-ast==1.4.0 # via mypy typing-extensions==3.7.4 # via mypy, nevergrad urllib3==1.25.3 # via requests wcwidth==0.1.7 # via pytest xlrd==1.2.0 # via nevergrad xlwt==1.3.0 # via nevergrad # The following packages are considered to be unsafe in a requirements file: # setuptools ================================================ FILE: requirements/pipreqs_edits.sed ================================================ /argparse/d /appnope/d /certifi/d /bayesmark/d ================================================ FILE: requirements/self.txt ================================================ bayesmark==0.0.8 ================================================ FILE: requirements/test.in ================================================ -r base.in -r optimizers.in hypothesis==4.32.3 hypothesis-gufunc==0.0.5rc2 numpy==1.16.1 pathvalidate==0.29.0 scipy==1.2.0 scikit-learn==0.20.2 xarray==0.12.2 pytest==5.0.1 pytest-cov==2.7.1 ================================================ FILE: requirements/test.txt ================================================ # SHA1:0dd8b5c26e6671e320706ddd399f6f62e19f3189 # # This file is autogenerated by pip-compile-multi # To update, run: # # pip-compile-multi # -r base.txt -r optimizers.txt hypothesis-gufunc==0.0.5rc2 # via -r requirements/test.in hypothesis==4.32.3 # via -r requirements/test.in, hypothesis-gufunc pytest-cov==2.7.1 # via -r requirements/test.in # The following packages are considered to be unsafe in a requirements file: # setuptools ================================================ FILE: requirements/tools.in ================================================ detect-secrets==0.12.5 ipykernel==5.1.1 nbconvert==5.6.0 pip-compile-multi==1.4.0 pipreqs==0.4.9 pre-commit==1.15.2 pytest==5.0.1 ================================================ FILE: requirements/tools.txt ================================================ # SHA1:08f4ed4790290aab315dd20169793be4f0a974af # # This file is autogenerated by pip-compile-multi # To update, run: # # pip-compile-multi # appnope==0.1.0 # via ipython aspy.yaml==1.3.0 # via pre-commit atomicwrites==1.3.0 # via pytest attrs==19.1.0 # via jsonschema, packaging, pytest backcall==0.1.0 # via ipython bleach==3.1.0 # via nbconvert certifi==2019.6.16 # via requests cfgv==2.0.1 # via pre-commit chardet==3.0.4 # via requests click==7.0 # via pip-compile-multi, pip-tools decorator==4.4.0 # via ipython, traitlets defusedxml==0.6.0 # via nbconvert detect-secrets==0.12.5 # via -r requirements/tools.in docopt==0.6.2 # via pipreqs entrypoints==0.3 # via nbconvert identify==1.4.5 # via pre-commit idna==2.8 # via requests importlib-metadata==0.18 # via importlib-resources, pluggy, pre-commit, pytest importlib-resources==2.0.1 # via pre-commit ipykernel==5.1.1 # via -r requirements/tools.in ipython-genutils==0.2.0 # via nbformat, traitlets ipython==7.6.1 # via ipykernel jedi==0.14.1 # via ipython jinja2==2.10.1 # via nbconvert jsonschema==3.0.2 # via nbformat jupyter-client==5.3.1 # via ipykernel jupyter-core==4.6.0 # via jupyter-client, nbconvert, nbformat markupsafe==1.1.1 # via jinja2 mistune==0.8.4 # via nbconvert more-itertools==7.2.0 # via pytest nbconvert==5.6.0 # via -r requirements/tools.in nbformat==4.4.0 # via nbconvert nodeenv==1.3.3 # via pre-commit packaging==19.1 # via pytest pandocfilters==1.4.2 # via nbconvert parso==0.5.1 # via jedi pexpect==4.7.0 # via ipython pickleshare==0.7.5 # via ipython pip-compile-multi==1.4.0 # via -r requirements/tools.in pip-tools==5.0.0 # via pip-compile-multi pipreqs==0.4.9 # via -r requirements/tools.in pluggy==0.12.0 # via pytest pre-commit==1.15.2 # via -r requirements/tools.in prompt-toolkit==2.0.9 # via ipython ptyprocess==0.6.0 # via pexpect py==1.8.0 # via pytest pygments==2.4.2 # via ipython, nbconvert pyparsing==2.4.2 # via packaging pyrsistent==0.15.4 # via jsonschema pytest==5.0.1 # via -r requirements/tools.in python-dateutil==2.8.0 # via jupyter-client pyyaml==5.1.1 # via aspy.yaml, detect-secrets, pre-commit pyzmq==18.0.2 # via jupyter-client requests==2.22.0 # via detect-secrets, yarg six==1.12.0 # via bleach, cfgv, jsonschema, packaging, pip-tools, pre-commit, prompt-toolkit, python-dateutil, traitlets testpath==0.4.2 # via nbconvert toml==0.10.0 # via pre-commit toposort==1.5 # via pip-compile-multi tornado==6.0.3 # via ipykernel, jupyter-client traitlets==4.3.2 # via ipykernel, ipython, jupyter-client, jupyter-core, nbconvert, nbformat urllib3==1.25.3 # via requests virtualenv==16.7.2 # via pre-commit wcwidth==0.1.7 # via prompt-toolkit, pytest webencodings==0.5.1 # via bleach yarg==0.1.9 # via pipreqs zipp==0.5.2 # via importlib-metadata, importlib-resources # The following packages are considered to be unsafe in a requirements file: # pip # setuptools ================================================ FILE: setup.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from setuptools import find_packages, setup CMD_NAME = "bayesmark" # Strings to remove from README to make it PyPI friendly. See: # https://packaging.python.org/guides/making-a-pypi-friendly-readme/#validating-restructuredtext-markup REMOVE_FROM_RST = (":func:", ":ref:") def read_requirements(name): with open("requirements/" + name + ".in") as f: requirements = f.read().strip() requirements = requirements.replace("==", ">=").splitlines() # Loosen strict pins return [pp for pp in requirements if pp[0].isalnum()] # Derive install requires from base.in first order requirements requirements = read_requirements("base") opt_requirements = read_requirements("optimizers") ipynb_requirements = read_requirements("ipynb") with open("README.rst") as f: long_description = f.read() # Probably more efficient way to do this with regex but good enough for remove_word in REMOVE_FROM_RST: long_description = long_description.replace(remove_word, "") setup( name="bayesmark", version="0.0.8", packages=find_packages(), url="https://github.com/uber/bayesmark/", author="Ryan Turner", author_email=("rdturnermtl@github.com"), license="Apache v2", description="Bayesian optimization benchmark system", install_requires=requirements, extras_require={"optimizers": opt_requirements, "notebooks": ipynb_requirements}, long_description=long_description, long_description_content_type="text/x-rst", platforms=["any"], entry_points={ "console_scripts": [ CMD_NAME + "-init = bayesmark.experiment_db_init:main", CMD_NAME + "-launch = bayesmark.experiment_launcher:main", CMD_NAME + "-agg = bayesmark.experiment_aggregate:main", CMD_NAME + "-baseline = bayesmark.experiment_baseline:main", CMD_NAME + "-anal = bayesmark.experiment_analysis:main", CMD_NAME + "-exp = bayesmark.experiment:main", ] }, ) ================================================ FILE: test/data_test.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from hypothesis import given from hypothesis.strategies import from_regex, sampled_from from bayesmark import data DATA_NAMES = sorted(data.DATA_LOADERS.keys()) @given(sampled_from(DATA_NAMES) | from_regex("^reg-[A-Z]*") | from_regex("^clf-[A-Z]*")) def test_get_problem_type(dataset_name): problem_type = data.get_problem_type(dataset_name) assert problem_type is not None ================================================ FILE: test/dummy.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import pytest import pytest_cov # import extra deps and use it to keep pipreqs and flake8 happy for pkg in (pytest, pytest_cov): print("%s %s" % (pkg.__name__, pkg.__version__)) ================================================ FILE: test/expected_max_test.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from hypothesis import given from hypothesis.strategies import floats, integers, lists import bayesmark.expected_max as em @given(integers(1, 10), integers(1, 10)) def test_get_expected_max_weights(n, m): pdf = em.get_expected_max_weights(n, m) assert pdf is not None @given(lists(floats()), integers(1, 10)) def test_expected_max(x, m): E_max_x = em.expected_max(x, m) assert E_max_x is not None @given(lists(floats()), integers(1, 10)) def test_expected_min(x, m): E_min_x = em.expected_min(x, m) assert E_min_x is not None ================================================ FILE: test/experiment_aggregate_test.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from itertools import product import numpy as np from hypothesis import HealthCheck, given, settings from hypothesis.strategies import floats from hypothesis_gufunc.extra.xr import ( fixed_datasets, simple_coords, simple_dataarrays, simple_datasets, xr_coords, xr_dims, ) import bayesmark.experiment_aggregate as agg from bayesmark.constants import EVAL_PHASE, ITER, METHOD, OBS_PHASE, SUGGEST, SUGGEST_PHASE, TEST_CASE, TRIAL from bayesmark.experiment import OBJECTIVE_NAMES from bayesmark.signatures import N_SUGGESTIONS N_SIG = N_SUGGESTIONS SIG_POINT = "sig_point" def data_to_concat(): def separate(ds): G = product( ds.coords[TEST_CASE].values.tolist(), ds.coords[METHOD].values.tolist(), ds.coords[TRIAL].values.tolist() ) L = [] for test_case, method, trial in G: # Could swap out trial for UUID here meta_data = (test_case, method, trial) ds_sub = ds.sel({TEST_CASE: test_case, METHOD: method, TRIAL: trial}, drop=True) perf_ds = ds_sub[list(OBJECTIVE_NAMES)] time_ds = ds_sub[[SUGGEST_PHASE, EVAL_PHASE, OBS_PHASE]] suggest_ds = ds_sub[["foo", "bar", "baz"]] sig = ds_sub["sig"].values.tolist() data = (perf_ds, time_ds, suggest_ds, sig) L.append((meta_data, data)) assert not any(np.any(np.isnan(perf_ds[kk].values)) for kk in perf_ds) assert not any(np.any(np.isnan(time_ds[kk].values)) for kk in time_ds) assert not any(np.any(np.isnan(suggest_ds[kk].values)) for kk in suggest_ds) assert not np.any(np.isnan(sig)) return L vars_to_dims = { "sig": (SIG_POINT, TEST_CASE, METHOD, TRIAL), SUGGEST_PHASE: (ITER, TEST_CASE, METHOD, TRIAL), EVAL_PHASE: (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL), OBS_PHASE: (ITER, TEST_CASE, METHOD, TRIAL), } dtype = {SUGGEST_PHASE: np.float_, EVAL_PHASE: np.float_, OBS_PHASE: np.float_, "sig": np.float_} for obj in OBJECTIVE_NAMES: vars_to_dims[obj] = (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL) dtype[obj] = np.float_ # We should also generate this using the space strategy, but hard coding this test case is good enough got now. input_vars = {"foo": np.float_, "bar": np.float_, "baz": np.int_} for vv, dd in input_vars.items(): vars_to_dims[vv] = (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL) dtype[vv] = dd float_no_nan = floats(allow_nan=False, min_value=-10, max_value=10) # Using on str following dim conventions for coords here coords_st = { ITER: simple_coords(min_side=1), SUGGEST: simple_coords(min_side=1), TEST_CASE: xr_coords(elements=xr_dims(), min_side=1), METHOD: xr_coords(elements=xr_dims(), min_side=1), TRIAL: simple_coords(min_side=1), SIG_POINT: simple_coords(min_side=N_SIG, max_side=N_SIG), } S = fixed_datasets(vars_to_dims, dtype=dtype, elements=float_no_nan, coords_st=coords_st, min_side=1).map(separate) return S def time_datasets(): vars_to_dims = {SUGGEST_PHASE: (ITER,), EVAL_PHASE: (ITER, SUGGEST), OBS_PHASE: (ITER,)} dtype = {SUGGEST_PHASE: np.float_, EVAL_PHASE: np.float_, OBS_PHASE: np.float_} elements = floats(min_value=0, allow_infinity=False, allow_nan=False) S = simple_datasets(vars_to_dims, dtype=dtype, elements=elements, min_side=1) return S def perf_dataarrays(): dims = (ITER, SUGGEST) elements = floats(allow_nan=False) S = simple_dataarrays(dims, dtype=np.float_, elements=elements) return S @given(time_datasets()) def test_summarize_time(all_time): time_summary = agg.summarize_time(all_time) assert time_summary is not None @given(data_to_concat()) @settings(deadline=None, suppress_health_check=(HealthCheck.too_slow,)) def test_concat_experiments(all_experiments): all_experiments = list(all_experiments) all_perf, all_time, all_suggest, all_sigs = agg.concat_experiments(all_experiments, ravel=False) ================================================ FILE: test/experiment_analysis_test.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from hypothesis import given, settings import bayesmark.experiment_baseline as base from bayesmark import experiment_analysis as anal from bayesmark.constants import TRIAL from bayesmark.np_util import argmin_2d from hypothesis_util import gufunc_floats from util import perf_dataarrays @given(gufunc_floats("(n,p,t),(n,p,t)->(n,t)", allow_nan=False, unique=True, min_side=1)) def test_get_perf_array(args): """Behavior for tie-breaking in `evals_visible` is complex, so only testing all unique case here.""" evals, evals_visible = args n_iter, _, n_trials = evals.shape perf_array = anal.get_perf_array(evals, evals_visible) assert perf_array.shape == (n_iter, n_trials) for ii in range(n_iter): for jj in range(n_trials): idx0, idx1 = argmin_2d(evals_visible[: ii + 1, :, jj]) assert perf_array[ii, jj] == evals[idx0, idx1, jj] @given(perf_dataarrays(min_trial=2)) @settings(deadline=None) def test_compute_aggregates(perf_da): n_trial = perf_da.sizes[TRIAL] split = n_trial // 2 assert isinstance(split, int) perf_da1 = perf_da.isel({TRIAL: slice(None, split)}) assert perf_da1.sizes[TRIAL] >= 1 perf_da2 = perf_da.isel({TRIAL: slice(split, None)}) assert perf_da2.sizes[TRIAL] >= 1 perf_da2.coords[TRIAL] = list(range(perf_da2.sizes[TRIAL])) baseline_ds = base.compute_baseline(perf_da1) anal.compute_aggregates(perf_da2, baseline_ds) @given(perf_dataarrays(min_trial=4)) @settings(deadline=None) def test_compute_aggregates_with_aux(perf_da): # Split to get baseline n_trial = perf_da.sizes[TRIAL] split = n_trial // 2 assert isinstance(split, int) perf_da1 = perf_da.isel({TRIAL: slice(None, split)}) assert perf_da1.sizes[TRIAL] >= 1 perf_da2 = perf_da.isel({TRIAL: slice(split, None)}) assert perf_da2.sizes[TRIAL] >= 1 perf_da2.coords[TRIAL] = list(range(perf_da2.sizes[TRIAL])) baseline_ds = base.compute_baseline(perf_da1) perf_da = perf_da2 # Split to get visible n_trial = perf_da.sizes[TRIAL] split = n_trial // 2 assert isinstance(split, int) perf_da1 = perf_da.isel({TRIAL: slice(None, split)}) assert perf_da1.sizes[TRIAL] >= 1 perf_da2 = perf_da.isel({TRIAL: slice(split, 2 * split)}) assert perf_da2.sizes[TRIAL] >= 1 perf_da2.coords[TRIAL] = list(range(perf_da2.sizes[TRIAL])) anal.compute_aggregates(perf_da2, baseline_ds, visible_perf_da=perf_da1) ================================================ FILE: test/experiment_baseline_test.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import warnings from hypothesis import given, settings import bayesmark.experiment_baseline as base from util import perf_dataarrays @given(perf_dataarrays()) @settings(deadline=None) def test_compute_baseline(perf_da): with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) baseline_ds = base.compute_baseline(perf_da) assert baseline_ds is not None ================================================ FILE: test/experiment_db_init_test.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import bayesmark.experiment_db_init as dbi def test_main(): # Really a nop test since there is nothing to test in this func assert dbi.EXIST_OK ================================================ FILE: test/experiment_launcher_test.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import warnings from io import StringIO from string import ascii_letters, digits import numpy as np from hypothesis import HealthCheck, assume, given, settings from hypothesis.strategies import ( booleans, fixed_dictionaries, from_regex, integers, lists, sampled_from, text, tuples, uuids, ) from pathvalidate.argparse import validate_filename, validate_filepath from bayesmark import data from bayesmark import experiment_launcher as launcher from bayesmark.builtin_opt.config import CONFIG from bayesmark.cmd_parse import CmdArgs from bayesmark.constants import ARG_DELIM, METRICS, MODEL_NAMES from hypothesis_util import seeds DATA_NAMES = sorted(data.DATA_LOADERS.keys()) def filepaths(): def valid(ss): try: validate_filepath(ss) except Exception: return False return True alphabet = ascii_letters + digits + "_.-~" + os.sep S = text(alphabet=alphabet, min_size=1).map(lambda ss: os.sep + ss).filter(valid) return S def filenames(suffix=""): def valid(ss): try: validate_filename(ss) except Exception: return False return True alphabet = ascii_letters + digits + "_.-~" S = text(alphabet=alphabet, min_size=1).map(lambda ss: ss + suffix).filter(valid) return S def joinables(): S = filenames().filter(lambda ss: ARG_DELIM not in ss) return S def datasets(): return sampled_from(DATA_NAMES) | from_regex("^reg-[A-Z]*$") | from_regex("^clf-[A-Z]*$") def launcher_args(opts, min_jobs=0): args_dict = { CmdArgs.db_root: filepaths(), CmdArgs.optimizer_root: filepaths(), CmdArgs.uuid: uuids(), CmdArgs.data_root: filepaths(), CmdArgs.db: filenames(), CmdArgs.optimizer: lists(sampled_from(opts), min_size=1, max_size=len(opts)), CmdArgs.data: lists(datasets(), min_size=1), CmdArgs.classifier: lists(sampled_from(MODEL_NAMES), min_size=1, max_size=len(MODEL_NAMES)), CmdArgs.metric: lists(sampled_from(METRICS), min_size=1, max_size=len(METRICS)), CmdArgs.n_calls: integers(1, 100), CmdArgs.n_suggest: integers(1, 100), CmdArgs.n_repeat: integers(1, 100), CmdArgs.n_jobs: integers(min_jobs, 1000), CmdArgs.jobs_file: filepaths(), CmdArgs.verbose: booleans(), } S = fixed_dictionaries(args_dict) return S def launcher_args_and_config(min_jobs=0): def args_and_config(opts): args = launcher_args(opts, min_jobs=min_jobs) configs = fixed_dictionaries({ss: filenames(suffix=".py") for ss in opts}) args_and_configs = tuples(args, configs) return args_and_configs # Make opt names a mix of built in opts and arbitrary names optimizers = lists(joinables() | sampled_from(sorted(CONFIG.keys())), min_size=1) S = optimizers.flatmap(args_and_config) return S def test_is_arg_safe_empty(): val = launcher._is_arg_safe("") assert isinstance(val, bool) assert not val @given(launcher_args_and_config(), uuids()) @settings(deadline=None, suppress_health_check=(HealthCheck.too_slow,)) def test_gen_commands(args, run_uuid): args, opt_file_lookup = args assume(all(launcher._is_arg_safe(ss) for ss in args.values() if isinstance(ss, str))) uniqify = [CmdArgs.optimizer, CmdArgs.data, CmdArgs.classifier, CmdArgs.metric] for uu in uniqify: assume(all(launcher._is_arg_safe(ss) for ss in args[uu])) args[uu] = list(set(args[uu])) m_set = set(args[CmdArgs.metric]) m_lookup = {problem_type: sorted(m_set.intersection(mm)) for problem_type, mm in data.METRICS_LOOKUP.items()} ok = all(len(m_lookup[data.get_problem_type(dd)]) > 0 for dd in args[CmdArgs.data]) assume(ok) G = launcher.gen_commands(args, opt_file_lookup, run_uuid) L = list(G) assert L is not None @given(launcher_args_and_config(min_jobs=1), uuids(), seeds()) @settings(deadline=None, suppress_health_check=(HealthCheck.too_slow,)) def test_dry_run(args, run_uuid, seed): args, opt_file_lookup = args assume(all(launcher._is_arg_safe(ss) for ss in args.values() if isinstance(ss, str))) uniqify = [CmdArgs.optimizer, CmdArgs.data, CmdArgs.classifier, CmdArgs.metric] for uu in uniqify: assume(all(launcher._is_arg_safe(ss) for ss in args[uu])) args[uu] = list(set(args[uu])) m_set = set(args[CmdArgs.metric]) m_lookup = {problem_type: sorted(m_set.intersection(mm)) for problem_type, mm in data.METRICS_LOOKUP.items()} ok = all(len(m_lookup[data.get_problem_type(dd)]) > 0 for dd in args[CmdArgs.data]) assume(ok) fp_buf = StringIO() random = np.random.RandomState(seed) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) launcher.dry_run(args, opt_file_lookup, run_uuid, fp_buf, random=random) jobs = fp_buf.getvalue() assert jobs is not None ================================================ FILE: test/experiment_test.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import inspect import os.path import numpy as np from hypothesis import assume, given, settings from hypothesis.strategies import floats, integers, sampled_from, text from hypothesis_gufunc.extra.xr import simple_datasets from hypothesis_gufunc.gufunc import gufunc_args import bayesmark.experiment as exp import bayesmark.random_search as rs from bayesmark import data, np_util from bayesmark.abstract_optimizer import AbstractOptimizer from bayesmark.builtin_opt.config import CONFIG from bayesmark.constants import DATA_LOADER_NAMES, ITER, METRICS, MODEL_NAMES, SUGGEST from bayesmark.sklearn_funcs import SklearnModel, TestFunction from hypothesis_util import seeds from util import space_configs class RandomOptimizer(AbstractOptimizer): # Unclear what is best package to list for primary_import here. primary_import = "bayesmark" def __init__(self, api_config, random=np_util.random, flaky=False): AbstractOptimizer.__init__(self, api_config) self.random = random self.flaky = flaky def suggest(self, n_suggestions=1): if self.flaky: assert self.random.rand() <= 0.5 x_guess = rs.suggest_dict([], [], self.api_config, n_suggestions=n_suggestions, random=self.random) return x_guess def observe(self, X, y): # Random search so don't do anything for observe if self.flaky: assert self.random.rand() <= 0.5 class OutOfBoundsOptimizer(AbstractOptimizer): def __init__(self, api_config, random=np_util.random): AbstractOptimizer.__init__(self, api_config) self.random = random self.param_list = sorted([kk for kk in api_config.keys() if api_config[kk]["type"] in ("real", "int")]) def suggest(self, n_suggestions=1): x_guess = rs.suggest_dict([], [], self.api_config, n_suggestions=n_suggestions, random=self.random) ii = self.random.randint(0, n_suggestions) pp = self.random.choice(self.param_list) if self.api_config[pp]["type"] == "real": eps = self.random.rand() else: eps = self.random.randint(1, 10) if self.random.rand() <= 0.5: x_guess[ii][pp] = self.api_config[pp]["range"][0] - eps else: x_guess[ii][pp] = self.api_config[pp]["range"][1] + eps return x_guess def observe(self, X, y): pass class FlakyProblem(TestFunction): def __init__(self, api_config, random): TestFunction.__init__(self) self.api_config = api_config self.random = random def evaluate(self, params): assert self.random.rand() <= 0.5 return [0.0] @given( sampled_from(MODEL_NAMES), sampled_from(DATA_LOADER_NAMES), sampled_from(METRICS), integers(0, 5), integers(1, 3), seeds(), ) @settings(max_examples=10, deadline=None) def test_run_study(model_name, dataset, scorer, n_calls, n_suggestions, seed): prob_type = data.get_problem_type(dataset) assume(scorer in data.METRICS_LOOKUP[prob_type]) function_instance = SklearnModel(model_name, dataset, scorer) optimizer = RandomOptimizer(function_instance.get_api_config(), random=np.random.RandomState(seed)) optimizer.get_version() exp.run_study(optimizer, function_instance, n_calls, n_suggestions, n_obj=len(function_instance.objective_names)) @given( sampled_from(MODEL_NAMES), sampled_from(DATA_LOADER_NAMES), sampled_from(METRICS), integers(1, 5), integers(1, 3), seeds(), ) def test_run_study_bounds_fail(model_name, dataset, scorer, n_calls, n_suggestions, seed): prob_type = data.get_problem_type(dataset) assume(scorer in data.METRICS_LOOKUP[prob_type]) function_instance = SklearnModel(model_name, dataset, scorer) optimizer = OutOfBoundsOptimizer(function_instance.get_api_config(), random=np.random.RandomState(seed)) optimizer.get_version() # pytest have some assert failed tools we could use instead, but this is ok for now bounds_fails = False try: exp.run_study( optimizer, function_instance, n_calls, n_suggestions, n_obj=len(function_instance.objective_names) ) except Exception as e: bounds_fails = str(e) == "Optimizer suggestion is out of range." assert bounds_fails @given( sampled_from(MODEL_NAMES), sampled_from(DATA_LOADER_NAMES), sampled_from(METRICS), integers(0, 5), integers(1, 3), seeds(), ) @settings(max_examples=10, deadline=None) def test_run_study_callback(model_name, dataset, scorer, n_calls, n_suggestions, seed): prob_type = data.get_problem_type(dataset) assume(scorer in data.METRICS_LOOKUP[prob_type]) function_instance = SklearnModel(model_name, dataset, scorer) optimizer = RandomOptimizer(function_instance.get_api_config(), random=np.random.RandomState(seed)) optimizer.get_version() n_obj = len(function_instance.objective_names) function_evals_cmin = np.zeros((n_calls, n_obj), dtype=float) iters_list = [] def callback(f_min, iters): assert f_min.shape == (n_obj,) iters_list.append(iters) if iters == 0: assert np.all(f_min == np.inf) return function_evals_cmin[iters - 1, :] = f_min function_evals, _, _ = exp.run_study( optimizer, function_instance, n_calls, n_suggestions, n_obj=n_obj, callback=callback ) assert iters_list == list(range(n_calls + 1)) for ii in range(n_obj): for jj in range(n_calls): idx0, idx1 = np_util.argmin_2d(function_evals[: jj + 1, :, 0]) assert function_evals_cmin[jj, ii] == function_evals[idx0, idx1, ii] @given(space_configs(allow_missing=True), integers(0, 5), integers(1, 3), seeds(), seeds()) @settings(deadline=None) def test_run_study_flaky(api_config, n_calls, n_suggestions, seed1, seed2): api_config, _, _, _ = api_config function_instance = FlakyProblem(api_config=api_config, random=np.random.RandomState(seed1)) optimizer = RandomOptimizer(api_config, random=np.random.RandomState(seed2), flaky=True) optimizer.get_version() exp.run_study(optimizer, function_instance, n_calls, n_suggestions) @given( space_configs(allow_missing=True), sampled_from(MODEL_NAMES), sampled_from(DATA_LOADER_NAMES), sampled_from(METRICS), integers(0, 5), integers(1, 3), seeds(), ) @settings(max_examples=10, deadline=None) def test_run_sklearn_study(api_config, model_name, dataset, scorer, n_calls, n_suggestions, seed): prob_type = data.get_problem_type(dataset) assume(scorer in data.METRICS_LOOKUP[prob_type]) random = np.random.RandomState(seed) exp.run_sklearn_study(RandomOptimizer, {"random": random}, model_name, dataset, scorer, n_calls, n_suggestions) @given( space_configs(allow_missing=True), sampled_from(MODEL_NAMES), sampled_from(DATA_LOADER_NAMES), sampled_from(METRICS), integers(0, 5), integers(1, 3), ) @settings(max_examples=10, deadline=None) def test_run_sklearn_study_real(api_config, model_name, dataset, scorer, n_calls, n_suggestions): prob_type = data.get_problem_type(dataset) assume(scorer in data.METRICS_LOOKUP[prob_type]) # Should really do parametric test but for loop good enough for opt_name in sorted(CONFIG.keys()): opt_class = exp._get_opt_class(opt_name) # opt_root=None should work with built-in opt opt_kwargs = exp.load_optimizer_kwargs(opt_name, opt_root=None) exp.run_sklearn_study(opt_class, opt_kwargs, model_name, dataset, scorer, n_calls, n_suggestions) @given(sampled_from(MODEL_NAMES), sampled_from(DATA_LOADER_NAMES), sampled_from(METRICS)) @settings(deadline=None) def test_get_objective_signature(model_name, dataset, scorer): prob_type = data.get_problem_type(dataset) assume(scorer in data.METRICS_LOOKUP[prob_type]) exp.get_objective_signature(model_name, dataset, scorer) @given(gufunc_args("(n,m,k),(k)->()", dtype=[np.float_, str], elements=[floats(), text()], unique=[False, True])) def test_build_eval_ds(args): function_evals, objective_names = args exp.build_eval_ds(function_evals, objective_names) @given(gufunc_args("(n),(n,m),(n)->()", dtype=np.float_, elements=floats(min_value=0, max_value=1e6))) def test_build_timing_ds(args): suggest_time, eval_time, observe_time = args exp.build_timing_ds(suggest_time, eval_time, observe_time) @given( simple_datasets( {"int": (ITER, SUGGEST), "real": (ITER, SUGGEST), "binary": (ITER, SUGGEST), "cat": (ITER, SUGGEST)}, dtype={"int": int, "real": float, "binary": bool, "cat": str}, min_side=1, ) ) def test_build_suggest_ds(suggest_ds): ds_vars = list(suggest_ds) n_call, n_suggest = suggest_ds[ds_vars[0]].values.shape suggest_log = np.zeros((n_call, n_suggest), dtype=object) for ii in range(n_call): for jj in range(n_suggest): suggest_log[ii, jj] = {} for kk in ds_vars: suggest_log[ii, jj][kk] = suggest_ds[kk].sel({ITER: ii, SUGGEST: jj}, drop=True).values.item() suggest_log = suggest_log.tolist() suggest_ds_2 = exp.build_suggest_ds(suggest_log) assert suggest_ds.equals(suggest_ds_2) def test_get_opt_class_module(): # Should really do parametric test but for loop good enough for opt_name in sorted(CONFIG.keys()): opt_class = exp._get_opt_class(opt_name) fname = inspect.getfile(opt_class) fname = os.path.basename(fname) wrapper_file, _ = CONFIG[opt_name] assert fname == wrapper_file ================================================ FILE: test/hypothesis_util.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np from hypothesis import given from hypothesis.strategies import floats, integers, just, tuples from hypothesis_gufunc.gufunc import gufunc_args def identity(x): """When one needs a default mapping that does nothing.""" return x def seeds(): return integers(min_value=0, max_value=(2 ** 32) - 1) def probs(): return floats(min_value=1e-3, max_value=1 - 1e-3) def mfloats(): return floats(min_value=-1e3, max_value=1e3) def gufunc_floats(signature, min_side=0, max_side=5, unique=False, **kwargs): elements = floats(**kwargs) S = gufunc_args(signature, dtype=np.float_, elements=elements, unique=unique, min_side=min_side, max_side=max_side) return S def close_enough(x, y, equal_nan=False, rtol=1e-5, atol=1e-8): # Might want to adjust rtol and atol for lower precision floats x, y = np.asarray(x), np.asarray(y) if x.shape != y.shape: return False if x.dtype != y.dtype: return False if x.dtype.kind == "f": assert y.dtype.kind == "f" # Note: equal_nan only considered in both float case! return np.allclose(x, y, equal_nan=equal_nan, rtol=rtol, atol=atol) return np.all(x == y) def broadcasted( f, signature, itypes, otypes, elements, unique=False, excluded=(), min_side=0, max_side=5, max_dims_extra=2 ): """Strategy that makes it easy to test the broadcasting semantics of a function against the 'ground-truth' broadcasting convention provided by :obj:`numpy.vectorize`. Parameters ---------- f : callable This is the original function handles broadcasting itself. It must return an `ndarray` or multiple `ndarray` (which Python treats as a `tuple`) if returning 2-or-more output arguments. signature : str Signature for shapes to be compatible with. Expects string in format of numpy generalized universal function signature, e.g., `'(m,n),(n)->(m)'` for vectorized matrix-vector multiplication. Officially, only supporting ascii characters. itypes : list-like of dtype List of numpy `dtype` for each argument. These can be either strings (``'int64'``), type (``np.int64``), or numpy `dtype` (``np.dtype('int64')``). A single `dtype` can be supplied for all arguments. otypes : list of dtype The dtype for the the outputs of `f`. It must be a list with one dtype for each output argument of `f`. It must be a singleton list if `f` only returns a single output. It can also be set to `None` to leave it to be inferred, but this can create issues with empty arrays, so it is not officially supported here. elements : list-like of strategy Strategies to fill in array elements on a per argument basis. One can also specify a single strategy (e.g., :func:`hypothesis.strategies.floats`) and have it applied to all arguments. unique : list-like of bool Boolean flag to specify if all elements in an array must be unique. One can also specify a single boolean to apply it to all arguments. excluded : list-like of integers Set of integers representing the positional for which the function will not be vectorized. Uses same format as :obj:`numpy.vectorize`. min_side : int or dict Minimum size of any side of the arrays. It is good to test the corner cases of 0 or 1 sized dimensions when applicable, but if not, a min size can be supplied here. Minimums can be provided on a per-dimension basis using a dict, e.g. ``min_side={'n': 2}``. One can use, e.g., ``min_side={hypothesis.extra.gufunc.BCAST_DIM: 2}`` to limit the size of the broadcasted dimensions. max_side : int or dict Maximum size of any side of the arrays. This can usually be kept small and still find most corner cases in testing. Dictionaries can be supplied as with `min_side`. max_dims_extra : int Maximum number of extra dimensions that can be appended on left of arrays for broadcasting. This should be kept small as the memory used grows exponentially with extra dimensions. Returns ------- f : callable This is the original function handles broadcasting itself. f_vec : callable Function that should be functionaly equivalent to `f` but broadcasting is handled by :obj:`numpy.vectorize`. res : tuple of ndarrays Resulting ndarrays with shapes consistent with `signature`. Extra dimensions for broadcasting will be present. Examples -------- .. code-block:: pycon >>> import numpy as np >>> from hypothesis.strategies import integers, booleans >>> broadcasted(np.add, '(),()->()', ['int64'], ['int64', 'bool'], elements=[integers(0,9), booleans()], unique=[True, False]).example() (, , (array([5, 6]), array([ True], dtype=bool))) >>> broadcasted(np.add, '(),()->()', ['int64'], ['int64', 'bool'], elements=[integers(0,9), booleans()], excluded=(1,)).example() (, , (array([9]), array(True, dtype=bool))) >>> f, fv, args = broadcasted(np.add, '(),()->()', ['int64'], ['int64', 'bool'], elements=[integers(0,9), booleans()], min_side=1, max_side=3, max_dims_extra=1).example() >>> f is np.add True >>> f(*args) 7 >>> fv(*args) array(7) """ # cache and doc not needed for property testing, excluded not actually # needed here because we don't generate extra dims for the excluded args. # Using the excluded argument in np.vectorize only seems to confuse it in # corner cases. f_vec = np.vectorize(f, signature=signature, otypes=otypes) broadcasted_args = gufunc_args( signature, itypes, elements, unique=unique, excluded=excluded, min_side=min_side, max_side=max_side, max_dims_extra=max_dims_extra, ) funcs_and_args = tuples(just(f), just(f_vec), broadcasted_args) return funcs_and_args def broadcast_tester( f, signature, otype, excluded=(), dtype=np.float_, elements=None, unique=False, map_=identity, min_side=0, max_side=5, max_dims_extra=2, **kwargs, # This still confuses flake8 ): # Build the test for broadcasting with random dimensions elements = floats(**kwargs) if elements is None else elements @given( broadcasted( f, signature, otypes=[otype], excluded=excluded, itypes=dtype, elements=elements, unique=unique, min_side=min_side, max_side=max_side, max_dims_extra=max_dims_extra, ) ) def test_f(bargs): f0, f_vec, args = bargs args = map_(args) R1 = f0(*args) R2 = f_vec(*args) kind = np.dtype(otype).kind if kind in "US": # Same kind ok for str and unicode dtypes assert R1.dtype.kind == kind assert R2.dtype.kind == kind elif otype is not None: assert R1.dtype == otype assert R2.dtype == otype assert close_enough(R1, R2, equal_nan=True) # Call the test test_f() def multi_broadcast_tester( f, signature, otypes, excluded=(), dtype=np.float_, elements=None, unique=False, map_=identity, min_side=0, max_side=5, max_dims_extra=2, **kwargs, ): elements = floats(**kwargs) if elements is None else elements @given( broadcasted( f, signature, otypes=otypes, excluded=excluded, itypes=dtype, elements=elements, unique=unique, min_side=min_side, max_side=max_side, max_dims_extra=max_dims_extra, ) ) def test_f(bargs): f0, f_vec, args = bargs args = map_(args) R1 = f0(*args) R2 = f_vec(*args) for rr1, rr2, ot in zip(R1, R2, otypes): kind = np.dtype(ot).kind if kind in "US": # Same kind ok for str and unicode dtypes assert R1.dtype.kind == kind assert R2.dtype.kind == kind else: assert rr1.dtype == ot assert rr2.dtype == ot assert close_enough(rr1, rr2, equal_nan=True) # Call the test test_f() ================================================ FILE: test/np_util_test.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np from hypothesis import assume, given from hypothesis.strategies import floats, integers, lists from bayesmark import np_util from hypothesis_util import broadcast_tester, close_enough, gufunc_floats, seeds @given(seeds()) def test_random_seed(seed): random = np.random.RandomState(seed) seed = np_util.random_seed(random) @given(lists(lists(floats())), seeds()) def test_shuffle_2d(X, seed): random = np.random.RandomState(seed) np_util.shuffle_2d(X, random) @given(gufunc_floats("(n,m)->()"), integers(1, 5), seeds()) def test_strat_split(X, n_splits, seed): X, = X random = np.random.RandomState(seed) np_util.strat_split(X, n_splits, inplace=False, random=random) random = np.random.RandomState(seed) np_util.strat_split(X, n_splits, inplace=True, random=random) @given(gufunc_floats("(),()->()", allow_nan=False)) def test_isclose_lte_pass(args): x, y = args x = np.minimum(x, y + 1e-10) assert np_util.isclose_lte(x, y) @given(gufunc_floats("(),()->()", allow_nan=False)) def test_isclose_lte_fail(args): x, y = args fac = 1e20 if np.ndim(x) == 0: y = np.nan_to_num(y) x = y + fac * np.spacing(np.abs(y)) + 1 else: y.flat[0] = np.nan_to_num(y.flat[0]) x.flat[0] = y.flat[0] + fac * np.spacing(np.abs(y.flat[0])) + 1 assert not np_util.isclose_lte(x, y) def test_isclose_broadcast(): broadcast_tester(np_util.isclose_lte, "(),()->()", otype="bool", min_value=-1000, max_value=1000) @given(gufunc_floats("(),(),()->()", allow_nan=False)) def test_clip_chk_pass(args): x, lb, ub = args assume(lb <= ub) x = np.clip(x, lb - 1e-10, ub + 1e-10) x_clip = np_util.clip_chk(x, lb=lb, ub=ub) assert np.all(x_clip == np.clip(x_clip, lb, ub)) @given(gufunc_floats("(),(),()->()", allow_nan=True)) def test_clip_chk_pass_nan(args): x, lb, ub = args assume(lb <= ub) x = np.clip(x, lb - 1e-10, ub + 1e-10) x_clip = np_util.clip_chk(x, lb=lb, ub=ub, allow_nan=True) assert np.all((np.isnan(x) & np.isnan(x_clip)) | (x_clip == np.clip(x_clip, lb, ub))) @given(gufunc_floats("(n),()->()", allow_nan=False)) def test_snap_to_pass(args): x, val = args x = np.clip(x, val - 1e-10, val + 1e-10) x_snap = np_util.snap_to(x, val) assert np.all(x_snap == val) @given(gufunc_floats("(),(),(),()->()", min_value=-1000, max_value=1000)) def test_linear_rescale_bounds(args): lb0, ub0, lb1, ub1 = args # Use sorted because hypothesis doesn't like using assume too often lb0, ub0 = sorted([lb0, ub0]) lb1, ub1 = sorted([lb1, ub1]) assume(lb0 < ub0) assume(lb1 <= ub1) lb1_ = np_util.linear_rescale(lb0, lb0, ub0, lb1, ub1) assert close_enough(lb1, lb1_) ub1_ = np_util.linear_rescale(ub0, lb0, ub0, lb1, ub1) assert close_enough(ub1, ub1_) @given(gufunc_floats("(),(),(),(),()->()", min_value=-1000, max_value=1000)) def test_linear_rescale_inner(args): X, lb0, ub0, lb1, ub1 = args # Use sorted because hypothesis doesn't like using assume too often lb0, ub0 = sorted([lb0, ub0]) lb1, ub1 = sorted([lb1, ub1]) assume(lb0 < ub0) assume(lb1 <= ub1) X = np.clip(X, lb0, ub0) X = np_util.linear_rescale(X, lb0, ub0, lb1, ub1) assert np.all(X <= ub1) assert np.all(lb1 <= X) @given(gufunc_floats("(),(),(),(),(),()->()", min_value=-1000, max_value=1000)) def test_linear_rescale_inverse(args): X, lb0, ub0, lb1, ub1, enforce_bounds = args enforce_bounds = enforce_bounds >= 0 # Use sorted because hypothesis doesn't like using assume too often lb0, ub0 = sorted([lb0, ub0]) lb1, ub1 = sorted([lb1, ub1]) assume(lb0 < ub0) assume(lb1 < ub1) # Can't expect numerics to work well in these extreme cases: assume((ub0 - lb0) < 1e3 * (ub1 - lb1)) if enforce_bounds: X = np.clip(X, lb0, ub0) X_ = np_util.linear_rescale(X, lb0, ub0, lb1, ub1, enforce_bounds=enforce_bounds) X_ = np_util.linear_rescale(X_, lb1, ub1, lb0, ub0, enforce_bounds=enforce_bounds) assert close_enough(X_, X) @given(gufunc_floats("(),(),(),(),()->()", min_value=-1000, max_value=1000)) def test_linear_rescale_bound_modes(args): X, lb0, ub0, lb1, ub1 = args # Use sorted because hypothesis doesn't like using assume too often lb0, ub0 = sorted([lb0, ub0]) lb1, ub1 = sorted([lb1, ub1]) assume(lb0 < ub0) assume(lb1 <= ub1) X = np.clip(X, lb0, ub0) Y1 = np_util.linear_rescale(X, lb0, ub0, lb1, ub1, enforce_bounds=False) Y2 = np_util.linear_rescale(X, lb0, ub0, lb1, ub1, enforce_bounds=True) assert close_enough(Y1, Y2) def pair_sort(X, Y): X, Y = np.broadcast_arrays(X, Y) Z = [X, Y] Z = np.sort(Z, axis=0) X, Y = Z return X, Y def test_linear_rescale_broadcast(): def clean_up(args): X, lb0, ub0, lb1, ub1, enforce_bounds = args enforce_bounds = enforce_bounds >= 0 # Ideally, hypothesis should be able to handle constraints like this lb0, ub0 = pair_sort(lb0, ub0) lb1, ub1 = pair_sort(lb1, ub1) assume(np.all(lb0 < ub0)) assume(np.all(lb1 <= ub1)) if enforce_bounds: X = np.clip(X, lb0, ub0) return X, lb0, ub0, lb1, ub1, enforce_bounds broadcast_tester( np_util.linear_rescale, "(),(),(),(),(),()->()", "float64", excluded=(5,), map_=clean_up, min_value=-1000, max_value=1000, ) @given(floats(), floats()) def test_isclose(x, y): """Test numpy version new enough to avoid broadcasting bug in `np.isclose`. See numpy issue 'inconsistency in np.isclose #7014'. We could bump up numpy requirement version and eliminate this wrapper. See: https://github.com/numpy/numpy/issues/7014 """ z = np.isclose(x, y) assert type(z) == np.bool_ assert z == np.squeeze(z) assert np.isscalar(z) assert np.shape(z) == () z = np.isclose(np.asarray(x), y) assert type(z) == np.bool_ assert z == np.squeeze(z) assert np.isscalar(z) assert np.shape(z) == () z = np.isclose(x, np.asarray(y)) assert type(z) == np.bool_ assert z == np.squeeze(z) assert np.isscalar(z) assert np.shape(z) == () z = np.isclose(np.asarray(x), np.asarray(y)) assert type(z) == np.bool_ assert z == np.squeeze(z) assert np.isscalar(z) assert np.shape(z) == () def test_isclose_2(): """Make sure we are running numpy version where numpy issue 'inconsistency in np.isclose #7014' has been fixed. """ y = np.isclose(0, 1) assert np.ndim(y) == 0 assert np.isscalar(y) @given(gufunc_floats("(n,m)->(2)", allow_nan=False, min_side=1)) def test_argmin_2d_no_nan(args): X, = args idx0, idx1 = np_util.argmin_2d(X) assert X[idx0, idx1] <= np.min(X) @given(gufunc_floats("(n,m)->(2)", allow_nan=True, min_side=1)) def test_argmin_2d_nan(args): X, = args idx0, idx1 = np_util.argmin_2d(X) assert np.isnan(X[idx0, idx1]) == np.any(np.isnan(X)) @given(gufunc_floats("(n,m),(n,m)->(n,m)", allow_nan=False)) def test_cummin(args): x_val, x_key = args n, m = x_val.shape c_min = np_util.cummin(x_val, x_key) assert c_min.shape == (n, m) for ii in range(n): for jj in range(m): last_min = np.where(x_key[: ii + 1, jj] == x_key[: ii + 1, jj].min())[0][-1] assert x_key[last_min, jj] <= np.min(x_key[: ii + 1, jj]) assert c_min[ii, jj] == x_val[last_min, jj] @given(gufunc_floats("(n,m),(n,m)->(n,m)", allow_nan=True)) def test_cummin_nan(args): x_val, x_key = args n, m = x_val.shape x_key = np.nan_to_num(x_key) c_min = np_util.cummin(x_val, x_key) assert c_min.shape == (n, m) for ii in range(n): for jj in range(m): last_min = np.where(x_key[: ii + 1, jj] == x_key[: ii + 1, jj].min())[0][-1] assert x_key[last_min, jj] <= np.min(x_key[: ii + 1, jj]) if np.isnan(c_min[ii, jj]): assert np.isnan(x_val[last_min, jj]) else: assert c_min[ii, jj] == x_val[last_min, jj] ================================================ FILE: test/quantiles_test.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import scipy.stats as ss from hypothesis import assume, given from hypothesis.strategies import floats, integers from hypothesis_gufunc.gufunc import gufunc_args as gufunc import bayesmark.quantiles as qt from hypothesis_util import broadcast_tester, gufunc_floats, multi_broadcast_tester # We could use nextafter to get closer to limits, but still creates numerics # issues. ABOVE0 = 1e-6 BELOW1 = 1 - 1e-6 GLOBAL_FPR = 0.05 # Will need to generalize these out to limits before upstreaming: floats_ = floats(allow_infinity=False, allow_nan=False) counts = integers(1, 1000) probs = floats(ABOVE0, BELOW1) def order_stats_trim(X): Y = qt.order_stats(X) X_ss = np.array(X.shape) X_ss[-1] = X_ss[-1] + 2 assert Y.shape == tuple(X_ss) assert np.all(Y[..., 0] == -np.inf) assert np.all(Y[..., -1] == np.inf) Y = Y[..., 1:-1] # Trim out infs assert Y.shape == tuple(X.shape) return Y @given(gufunc_floats("(n)->(m)", allow_nan=False)) def test_order_stats(args): X, = args o_stats = qt.order_stats(X) assert len(o_stats) == len(X) + 2 # test is sorted assert not np.any(np.diff(o_stats) < 0) # limit elements assert o_stats[0] == -np.inf assert o_stats[-1] == np.inf # equal to equiv versions with lists assert [-np.inf] + sorted(X) + [np.inf] == list(o_stats) @given(gufunc_floats("(n)->()", allow_nan=False)) def test_quantile(args): X, = args ll = qt.quantile(X, np.nextafter(0, 1)) assert ll == -np.inf if len(X) == 0 else ll == np.min(X) uu = qt.quantile(X, np.nextafter(1, 0)) assert uu == -np.inf if len(X) == 0 else uu == np.max(X) if len(X) % 2 == 1: mm = qt.quantile(X, 0.5) assert mm == np.median(X) @given(gufunc("(n),()->()", dtype=np.float_, elements=[floats_, probs])) def test_quantile_to_np(args): X, q = args estimate = qt.quantile(X, q) # Correct the off-by-1 error in numpy percentile. This might still have # issues due to round off error by multiplying by 100 since powers of 10 # are not very fp friendly. estimate_np = np.percentile(np.concatenate(([-np.inf], X)), 100 * q, interpolation="higher") assert estimate == estimate_np @given(gufunc("(n),(),()->(),()", dtype=np.float_, elements=[floats_, probs, probs])) def test_quantile_CI(args): X, q, alpha = args idx_q = qt._quantile(len(X), q) idx_l, idx_u = qt._quantile_CI(len(X), q, alpha) assert idx_l <= idx_q assert idx_q <= idx_u # Lot's of checks already inside quantile_CI LB, UB = qt.quantile_CI(X, q, alpha) assert LB <= UB estimate = qt.quantile(X, q) assert LB <= estimate assert estimate <= UB @given(gufunc("(n),(),()->(),()", dtype=np.float_, elements=[floats_, probs, probs])) def test_quantile_CI_monotone_x(args): X, q, alpha = args assume(len(X) >= 1) LB1, UB1 = qt.quantile_CI(X, q, alpha) X2 = np.copy(X) X2[0] = -np.inf LB2, UB2 = qt.quantile_CI(X2, q, alpha) assert LB1 >= LB2 assert UB1 >= UB2 X2 = np.copy(X) X2[0] = np.inf LB2, UB2 = qt.quantile_CI(X2, q, alpha) assert LB1 <= LB2 assert UB1 <= UB2 @given(gufunc("(n),(2),()->(),()", dtype=np.float_, elements=[floats_, probs, probs])) def test_quantile_CI_monotone_q(args): X, q, alpha = args q1, q2 = sorted(q) # Lot's of checks already inside quantile_CI LB1, UB1 = qt.quantile_CI(X, q1, alpha) LB2, UB2 = qt.quantile_CI(X, q2, alpha) assert LB1 <= LB2 assert UB1 <= UB2 @given(gufunc("(n),(),(2)->(),()", dtype=np.float_, elements=[floats_, probs, probs])) def test_quantile_CI_monotone_alpha(args): X, q, alpha = args alpha1, alpha2 = sorted(alpha) # Lot's of checks already inside quantile_CI LB1, UB1 = qt.quantile_CI(X, q, alpha1) # This CI should be larger LB2, UB2 = qt.quantile_CI(X, q, alpha2) assert LB1 <= LB2 assert UB1 >= UB2 @given( gufunc( "(n),(),(),()->()", dtype=[np.float_, np.float_, np.int_, np.float_], elements=[floats_, probs, counts, probs] ) ) def test_max_quantile_CI(args): X, q, m, alpha = args estimate0, LB0, UB0 = qt.max_quantile_CI(X, q, m, alpha) assert LB0 <= estimate0 assert estimate0 <= UB0 # Recompute without using _ internal funcs q = q ** (1.0 / m) LB, UB = qt.quantile_CI(X, q, alpha=alpha) estimate = qt.quantile(X, q) assert estimate0 == estimate assert LB0 == LB assert UB0 == UB @given( gufunc( "(n),(),(),()->()", dtype=[np.float_, np.float_, np.int_, np.float_], elements=[floats_, probs, counts, probs] ) ) def test_min_quantile_CI(args): X, q, m, alpha = args estimate0, LB0, UB0 = qt.min_quantile_CI(X, q, m, alpha) assert LB0 <= estimate0 assert estimate0 <= UB0 # Recompute without using _ internal funcs q = 1.0 - (1.0 - q) ** (1.0 / m) LB, UB = qt.quantile_CI(X, q, alpha=alpha) estimate = qt.quantile(X, q) assert estimate0 == estimate assert LB0 == LB assert UB0 == UB @given( gufunc( "(n),(),(),()->()", dtype=[np.float_, np.float_, np.int_, np.float_], elements=[floats_, probs, counts, probs] ) ) def test_min_quantile_CI_to_max(args): X, q, m, alpha = args epsilon = 1e-8 # Small allowance for numerics estimate0, LB0, UB0 = qt.min_quantile_CI(X, q, m, alpha) # Try just above and below to allow for numerics error in case we are # just on the boundary. estimate1, LB1, UB1 = qt.max_quantile_CI(-X, (1.0 - q) - epsilon, m, alpha) estimate2, LB2, UB2 = qt.max_quantile_CI(-X, 1.0 - q, m, alpha) estimate3, LB3, UB3 = qt.max_quantile_CI(-X, (1.0 - q) + epsilon, m, alpha) if len(X) == 0: assert estimate0 == -np.inf # quantile spec rounds down if n=0 else: assert -estimate0 in (estimate1, estimate2, estimate3) assert -LB0 in (UB1, UB2, UB3) assert -UB0 in (LB1, LB2, LB3) @given(gufunc("(n),(),()->()", dtype=np.float_, elements=[floats_, probs, probs])) def test_quantile_and_CI(args): X, q, alpha = args estimate0, LB0, UB0 = qt.quantile_and_CI(X, q, alpha) assert LB0 <= estimate0 assert estimate0 <= UB0 # Recompute without using _ internal funcs LB, UB = qt.quantile_CI(X, q, alpha=alpha) estimate = qt.quantile(X, q) assert estimate0 == estimate assert LB0 == LB assert UB0 == UB def test_order_stats_broadcast(): broadcast_tester(order_stats_trim, "(n)->(n)", otype="float64", dtype=np.float_, elements=floats_) def test_quantile_broadcast_0(): broadcast_tester( qt.quantile, "(n),()->()", otype="float64", excluded=(0,), dtype=np.float_, elements=[floats_, probs] ) def test_quantile_broadcast_1(): broadcast_tester( qt.quantile, "(n),()->()", otype="float64", excluded=(1,), dtype=np.float_, elements=[floats_, probs] ) def test_quantile_CI_broadcast_0(): multi_broadcast_tester( qt.quantile_CI, "(n),(),()->(),()", otypes=["float64", "float64"], excluded=(0,), dtype=np.float_, elements=[floats_, probs, probs], ) def test_quantile_CI_broadcast_1(): multi_broadcast_tester( qt.quantile_CI, "(n),(),()->(),()", excluded=(1, 2), otypes=["float64", "float64"], dtype=np.float_, elements=[floats_, probs, probs], ) def test_max_quantile_CI_broadcast_0(): multi_broadcast_tester( qt.max_quantile_CI, "(n),(),(),()->(),(),()", otypes=["float64", "float64", "float64"], excluded=(0,), dtype=[np.float_, np.float_, np.int_, np.float_], elements=[floats_, probs, counts, probs], ) def test_max_quantile_CI_broadcast_1(): multi_broadcast_tester( qt.max_quantile_CI, "(n),(),(),()->(),(),()", otypes=["float64", "float64", "float64"], excluded=(1, 2, 3), dtype=[np.float_, np.float_, np.int_, np.float_], elements=[floats_, probs, counts, probs], ) def test_min_quantile_CI_broadcast_0(): multi_broadcast_tester( qt.min_quantile_CI, "(n),(),(),()->(),(),()", otypes=["float64", "float64", "float64"], excluded=(0,), dtype=[np.float_, np.float_, np.int_, np.float_], elements=[floats_, probs, counts, probs], ) def test_min_quantile_CI_broadcast_1(): multi_broadcast_tester( qt.min_quantile_CI, "(n),(),(),()->(),(),()", otypes=["float64", "float64", "float64"], excluded=(1, 2, 3), dtype=[np.float_, np.float_, np.int_, np.float_], elements=[floats_, probs, counts, probs], ) def test_quantile_and_CI_broadcast_0(): multi_broadcast_tester( qt.quantile_and_CI, "(n),(),()->(),(),()", otypes=["float64", "float64", "float64"], excluded=(0,), dtype=np.float_, elements=[floats_, probs, probs], ) def test_quantile_and_CI_broadcast_1(): multi_broadcast_tester( qt.quantile_and_CI, "(n),(),()->(),(),()", otypes=["float64", "float64", "float64"], excluded=(1, 2), dtype=np.float_, elements=[floats_, probs, probs], ) def mc_test_quantile_CI(mc_runs=1000, n=2000, q=0.5, alpha=0.05, random=np.random): q0 = ss.norm.ppf(q) X = random.randn(mc_runs, n) R = np.array([qt.quantile_CI(xx, q) for xx in X]) LB, UB = R[:, 0], R[:, 1] n_pass = np.sum((LB <= q0) & (q0 <= UB)) # This is only a one-sided test pval = ss.binom.cdf(n_pass, mc_runs, 1 - alpha) return pval def mc_test_max_quantile_CI(mc_runs=1000, n=2000, q=0.5, m=100, alpha=0.05, random=np.random): qq_level = q ** (1.0 / m) q0 = ss.norm.ppf(qq_level) X = random.randn(mc_runs, n) R = np.array([qt.max_quantile_CI(xx, q, m, alpha) for xx in X]) LB, UB = R[:, 1], R[:, 2] n_pass = np.sum((LB <= q0) & (q0 <= UB)) # This is only a one-sided test pval = ss.binom.cdf(n_pass, mc_runs, 1 - alpha) return pval def mc_test_min_quantile_CI(mc_runs=1000, n=2000, q=0.5, m=100, alpha=0.05, random=np.random): qq_level = 1.0 - (1.0 - q) ** (1.0 / m) q0 = ss.norm.ppf(qq_level) X = random.randn(mc_runs, n) R = np.array([qt.min_quantile_CI(xx, q, m, alpha) for xx in X]) LB, UB = R[:, 1], R[:, 2] n_pass = np.sum((LB <= q0) & (q0 <= UB)) # This is only a one-sided test pval = ss.binom.cdf(n_pass, mc_runs, 1 - alpha) return pval def test_all_mc(): random = np.random.RandomState(8623) pvals = [] pvals.append(mc_test_quantile_CI(q=0.3, random=random)) pvals.append(mc_test_quantile_CI(q=0.5, random=random)) pvals.append(mc_test_quantile_CI(q=0.99, random=random)) pvals.append(mc_test_max_quantile_CI(q=0.3, random=random)) pvals.append(mc_test_max_quantile_CI(q=0.5, random=random)) pvals.append(mc_test_max_quantile_CI(q=0.99, random=random)) pvals.append(mc_test_min_quantile_CI(q=0.3, random=random)) pvals.append(mc_test_min_quantile_CI(q=0.5, random=random)) pvals.append(mc_test_min_quantile_CI(q=0.99, random=random)) SIDAK_FPR = 1.0 - (1.0 - GLOBAL_FPR) ** (1.0 / len(pvals)) assert np.min(pvals) >= SIDAK_FPR return pvals ================================================ FILE: test/random_search_test.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np from hypothesis import given, settings from hypothesis.strategies import integers import bayesmark.space as sp from bayesmark.np_util import linear_rescale from bayesmark.random_search import suggest_dict from hypothesis_util import close_enough, gufunc_floats, seeds from util import space_configs @given(space_configs(allow_missing=True), integers(min_value=1, max_value=8), seeds()) @settings(deadline=None) def test_random_search_suggest_sanity(api_args, n_suggest, seed): meta, X, y, _ = api_args # Get the unwarped X S = sp.JointSpace(meta) lower, upper = S.get_bounds().T S.validate(X) N = len(X) # Split history and call twice with diff histories but same seed M = N // 2 X1, X2 = X[:M], X[M:] y1, y2 = y[:M], y[M:] x_guess = suggest_dict(X1, y1, meta, n_suggest, random=np.random.RandomState(seed)) x_guess2 = suggest_dict(X2, y2, meta, n_suggest, random=np.random.RandomState(seed)) # Check types too assert len(x_guess) == n_suggest assert all(all(close_enough(x_guess[nn][k], x_guess2[nn][k]) for k in x_guess[nn]) for nn in range(len(x_guess))) assert np.all(x_guess == x_guess2) # Make sure validated S.validate(x_guess) S.validate(x_guess2) # Test sanity of output D, = lower.shape x_guess_w = S.warp(x_guess) assert type(x_guess_w) == np.ndarray assert x_guess_w.dtype.kind == "f" assert x_guess_w.shape == (n_suggest, D) assert x_guess_w.shape == (n_suggest, D) assert np.all(x_guess_w <= upper) @given( gufunc_floats("(n,D),(n)->()", min_value=0.0, max_value=1.0, min_side={"D": 1}), integers(min_value=1, max_value=10), seeds(), ) @settings(deadline=None) def test_random_search_suggest_diff(api_args, n_suggest, seed): # Hard to know how many iters needed for arbitrary space that we need to # run so that we don't get dupes by chance. So, for now, let's just stick # with this simple space. dim = {"space": "linear", "type": "real", "range": [1.0, 5.0]} # Use at least 10 n_suggest to make sure don't get same answer by chance X_w, y = api_args D = X_w.shape[1] param_names = ["x%d" % ii for ii in range(5)] meta = dict(zip(param_names, [dim] * D)) # Get the unwarped X S = sp.JointSpace(meta) lower, upper = S.get_bounds().T X_w = linear_rescale(X_w, lb0=0.0, ub0=1.0, lb1=lower, ub1=upper) X = S.unwarp(X_w) S.validate(X) seed = seed // 2 # Keep in bounds even after add 7 x_guess = suggest_dict(X, y, meta, n_suggest, random=np.random.RandomState(seed)) # Use diff seed to intentionally get diff result x_guess2 = suggest_dict(X, y, meta, n_suggest, random=np.random.RandomState(seed + 7)) # Check types too assert len(x_guess) == n_suggest assert len(x_guess2) == n_suggest assert not np.all(x_guess == x_guess2) # Make sure validated S.validate(x_guess) S.validate(x_guess2) # Test sanity of output D, = lower.shape x_guess_w = S.warp(x_guess) assert type(x_guess_w) == np.ndarray assert x_guess_w.dtype.kind == "f" assert x_guess_w.shape == (n_suggest, D) assert x_guess_w.shape == (n_suggest, D) assert np.all(x_guess_w <= upper) x_guess_w = S.warp(x_guess2) assert type(x_guess_w) == np.ndarray assert x_guess_w.dtype.kind == "f" assert x_guess_w.shape == (n_suggest, D) assert x_guess_w.shape == (n_suggest, D) assert np.all(x_guess_w <= upper) ================================================ FILE: test/serialize_test.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os from string import ascii_letters, digits from hypothesis import given from hypothesis.strategies import lists, text, uuids from pathvalidate.argparse import validate_filename, validate_filepath from bayesmark.serialize import XRSerializer def filepaths(): def valid(ss): try: validate_filepath(ss) except Exception: return False return True alphabet = ascii_letters + digits + "_.-~" + os.sep S = text(alphabet=alphabet, min_size=1).map(lambda ss: os.sep + ss).filter(valid) return S def filenames(suffix=""): def valid(ss): try: validate_filename(ss) except Exception: return False return True alphabet = ascii_letters + digits + "_.-~" S = text(alphabet=alphabet, min_size=1).map(lambda ss: ss + suffix).filter(valid) return S @given(filepaths(), lists(filenames()), filenames()) def test_init_db_manual(db_root, keys, db): XRSerializer.init_db_manual(db_root, keys, db) @given(uuids()) def test_uuid_to_fname(uu): ff = XRSerializer._uuid_to_fname(uu) uu_ = XRSerializer._fname_to_uuid(ff) assert uu == uu_ ff_ = XRSerializer._uuid_to_fname(uu_) assert ff == ff_ @given(filenames()) def test_key_to_fname(key): ff = XRSerializer._key_to_fname(key) kk = XRSerializer._fname_to_key(ff) assert key == kk @given(filepaths(), lists(filenames()), filenames()) def test_validate(db_root, keys, db): XRSerializer._validate(db_root, keys, db) ================================================ FILE: test/signatures_test.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import json import random as pyrandom import warnings import numpy as np from hypothesis import given from hypothesis.strategies import dictionaries, floats, lists, text, tuples import bayesmark.signatures as ss from bayesmark.experiment import OBJECTIVE_NAMES from util import space_configs N_SIG = ss.N_SUGGESTIONS def bsigs(): S = lists(floats(allow_infinity=False, allow_nan=False), min_size=N_SIG, max_size=N_SIG) return S def sigs(): S = lists(bsigs(), min_size=1) return S def sig_pair(): def separate(D): signatures, signatures_ref = {}, {} for kk in D: if len(D[kk]) == 1: v_ref, = D[kk] signatures_ref[kk] = np.asarray(v_ref) elif len(D[kk]) == 2: v, v_ref = D[kk] signatures[kk] = np.asarray(v) signatures_ref[kk] = np.asarray(v_ref) else: assert False return signatures, signatures_ref sig_dict = dictionaries(text(), tuples(bsigs()) | tuples(bsigs(), bsigs())) S = sig_dict.map(separate) return S def some_mock_f(x): """Some arbitrary deterministic test function. """ random_stream = pyrandom.Random(json.dumps(x, sort_keys=True)) y = [random_stream.gauss(0, 1) for _ in OBJECTIVE_NAMES] return y @given(space_configs()) def test_get_func_signature(api_config): api_config, _, _, _ = api_config signature_x, signature_y = ss.get_func_signature(some_mock_f, api_config) @given(dictionaries(text(), sigs())) def test_analyze_signatures(signatures): with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) sig_errs, signatures_median = ss.analyze_signatures(signatures) @given(sig_pair()) def test_analyze_signature_pair(args): signatures, signatures_ref = args with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) sig_errs, signatures_pair = ss.analyze_signature_pair(signatures, signatures_ref) ================================================ FILE: test/sklearn_funcs_test.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import pickle as pkl import numpy as np from hypothesis import assume, given, settings from hypothesis.strategies import sampled_from, text from sklearn.linear_model import LinearRegression from bayesmark import data from bayesmark import sklearn_funcs as skf from bayesmark.constants import ARG_DELIM, DATA_LOADER_NAMES, METRICS, MODEL_NAMES from bayesmark.random_search import suggest_dict from bayesmark.space import JointSpace from hypothesis_util import seeds @given(sampled_from(MODEL_NAMES), sampled_from(DATA_LOADER_NAMES), sampled_from(METRICS), seeds(), seeds()) @settings(deadline=None) def test_sklearn_model(model, dataset, metric, shuffle_seed, rs_seed): prob_type = data.get_problem_type(dataset) assume(metric in data.METRICS_LOOKUP[prob_type]) test_prob = skf.SklearnModel(model, dataset, metric, shuffle_seed=shuffle_seed) api_config = test_prob.get_api_config() x_guess, = suggest_dict([], [], api_config, n_suggestions=1, random=np.random.RandomState(rs_seed)) loss = test_prob.evaluate(x_guess) assert isinstance(loss, tuple) assert all(isinstance(xx, float) for xx in loss) assert np.shape(loss) == np.shape(test_prob.objective_names) @given(text(), text(), text()) def test_inverse_test_case_str(model, dataset, scorer): assume(ARG_DELIM not in (model + dataset + scorer)) test_case = skf.SklearnModel.test_case_str(model, dataset, scorer) R = skf.SklearnModel.inverse_test_case_str(test_case) assert R == (model, dataset, scorer) @given(sampled_from(MODEL_NAMES), sampled_from(DATA_LOADER_NAMES), sampled_from(METRICS), seeds(), seeds()) @settings(deadline=None) def test_sklearn_model_surr(model, dataset, metric, model_seed, rs_seed): prob_type = data.get_problem_type(dataset) assume(metric in data.METRICS_LOOKUP[prob_type]) test_prob = skf.SklearnModel(model, dataset, metric, shuffle_seed=0) api_config = test_prob.get_api_config() space = JointSpace(api_config) n_obj = len(test_prob.objective_names) n_suggestions = 20 x_guess = suggest_dict([], [], api_config, n_suggestions=n_suggestions, random=np.random.RandomState(rs_seed)) x_guess_w = space.warp(x_guess) random = np.random.RandomState(model_seed) y = random.randn(n_suggestions, n_obj) reg = LinearRegression() reg.fit(x_guess_w, y) loss0 = reg.predict(x_guess_w) path = pkl.dumps(reg) del reg assert isinstance(path, bytes) test_prob_surr = skf.SklearnSurrogate(model, dataset, metric, path) loss = test_prob_surr.evaluate(x_guess[0]) assert isinstance(loss, tuple) assert all(isinstance(xx, float) for xx in loss) assert np.shape(loss) == np.shape(test_prob.objective_names) assert np.allclose(loss0[0], np.array(loss)) ================================================ FILE: test/space_test.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np from hypothesis import assume, given from hypothesis.extra.numpy import from_dtype from hypothesis.strategies import booleans, floats, integers, just, lists, sampled_from from hypothesis_gufunc.gufunc import gufunc_args as gufunc from scipy.interpolate import interp1d from sklearn.preprocessing import LabelBinarizer import bayesmark.space as sp from bayesmark.np_util import linear_rescale from bayesmark.space import CAT_DTYPE, CAT_KIND, CAT_NATIVE_DTYPE from hypothesis_util import broadcast_tester, close_enough, gufunc_floats from util import space_configs INT_MIN = np.iinfo("i").min INT_MAX = np.iinfo("i").max WARPS = ("logit", "linear", "bilog", "log") ENCODER_DTYPES = ("bool", "int", "float") def encoder_gen(args): X, labels, assume_sorted, dtype, assume_valid = args if assume_sorted: labels = np.sort(labels) X = labels[X % len(labels)] dtype = dtype.item() # np.array does not like np.array(dtype) return X, labels, assume_sorted, dtype, assume_valid def decoder_gen(args): Y, labels, assume_sorted, dtype, assume_valid = args if assume_sorted: labels = np.sort(labels) dtype = dtype.item() return Y, labels, assume_sorted, dtype, assume_valid def decoder_gen_broadcast(args): Y, labels, assume_sorted = args if assume_sorted: labels = np.sort(labels) return Y, labels, assume_sorted @given( gufunc( "(),(n),(),(),()->(n)", dtype=[np.int_, CAT_DTYPE, np.bool_, str, np.bool_], elements=[ integers(0, INT_MAX), from_dtype(np.dtype(CAT_DTYPE)), booleans(), sampled_from(ENCODER_DTYPES), booleans(), ], unique=[False, True, False, False, False], min_side={"n": 1}, ).map(encoder_gen) ) def test_encode_decode(args): X, labels, assume_sorted, dtype, assume_valid = args Y = sp.encode(X, labels, assume_sorted=assume_sorted, dtype=dtype, assume_valid=assume_valid) if assume_sorted: # otherwise labels will be re-arranged (idx,), = np.where(Y > 0) assert np.asarray(labels[idx]) == X assert Y.dtype == dtype X2 = sp.decode(Y, labels, assume_sorted=assume_sorted) assert close_enough(X, X2) @given( gufunc( "(m),(n),(),(),()->(n)", dtype=[np.int_, CAT_DTYPE, np.bool_, str, np.bool_], elements=[ integers(0, INT_MAX), from_dtype(np.dtype(CAT_DTYPE)), booleans(), sampled_from(ENCODER_DTYPES), booleans(), ], unique=[False, True, False, False, False], min_side={"m": 1, "n": 3}, ).map(encoder_gen) ) def test_encoder_to_sklearn(args): # sklearn cannot handle this correctly unless n >= 3 X, labels, assume_sorted, dtype, assume_valid = args Y = sp.encode(X, labels, assume_sorted=assume_sorted, dtype=dtype, assume_valid=assume_valid) enc = LabelBinarizer() enc.fit(labels) Y2 = enc.transform(X) assert close_enough(Y, Y2.astype(dtype)) @given( gufunc( "(m,n),(n),(),(),()->(n)", dtype=[np.float_, CAT_DTYPE, np.bool_, str, np.bool_], elements=[floats(), from_dtype(np.dtype(CAT_DTYPE)), booleans(), sampled_from(ENCODER_DTYPES), booleans()], unique=[False, True, False, False, False], min_side={"n": 1}, ).map(decoder_gen) ) def test_decode_encode(args): Y, labels, assume_sorted, dtype, assume_valid = args assert Y.ndim >= 1 and Y.shape[-1] == len(labels) X = sp.decode(Y, labels, assume_sorted=assume_sorted) Y2 = sp.encode(X, labels, assume_sorted=assume_sorted, dtype=dtype, assume_valid=assume_valid) # The encoding is defined as the argmax assert np.all(Y.argmax(axis=1) == Y2.argmax(axis=1)) assert np.all(np.sum(Y2 != 0, axis=1) == 1) assert np.all(np.sum(Y2 == 1, axis=1) == 1) @given( gufunc( "(m,n),(n),(),(),()->(n)", dtype=[np.float_, CAT_DTYPE, np.bool_, str, np.bool_], elements=[floats(), from_dtype(np.dtype(CAT_DTYPE)), booleans(), sampled_from(ENCODER_DTYPES), booleans()], unique=[False, True, False, False, False], min_side={"m": 1, "n": 3}, ).map(decoder_gen) ) def test_decode_to_sklearn(args): Y, labels, assume_sorted, dtype, assume_valid = args assert Y.ndim >= 1 and Y.shape[-1] == len(labels) X = sp.decode(Y, labels, assume_sorted=assume_sorted) enc = LabelBinarizer() enc.fit(labels) X2 = enc.inverse_transform(Y) assert X.dtype.kind == CAT_KIND assert close_enough(X, X2.astype(X.dtype)) def test_encode_broadcast_bool(): broadcast_tester( sp.encode, "(),(n),(),(),()->(n)", otype=bool, excluded=(1, 2, 3, 4), dtype=[np.int_, CAT_DTYPE, np.bool_, object, np.bool_], elements=[integers(0, INT_MAX), from_dtype(np.dtype(CAT_DTYPE)), booleans(), just("bool"), booleans()], unique=[False, True, False, False, False], min_side={"n": 1}, map_=encoder_gen, ) def test_encode_broadcast_int(): broadcast_tester( sp.encode, "(),(n),(),(),()->(n)", otype=int, excluded=(1, 2, 3, 4), dtype=[np.int_, CAT_DTYPE, np.bool_, object, np.bool_], elements=[integers(0, INT_MAX), from_dtype(np.dtype(CAT_DTYPE)), booleans(), just("int"), booleans()], unique=[False, True, False, False, False], min_side={"n": 1}, map_=encoder_gen, ) def test_encode_broadcast_float(): broadcast_tester( sp.encode, "(),(n),(),(),()->(n)", otype=float, excluded=(1, 2, 3, 4), dtype=[np.int_, CAT_DTYPE, np.bool_, object, np.bool_], elements=[integers(0, INT_MAX), from_dtype(np.dtype(CAT_DTYPE)), booleans(), just("float"), booleans()], unique=[False, True, False, False, False], min_side={"n": 1}, map_=encoder_gen, ) def test_decode_broadcast_bool(): broadcast_tester( sp.decode, "(m,n),(n),()->(m)", otype=CAT_DTYPE, excluded=(1, 2), dtype=[np.bool_, CAT_DTYPE, np.bool_], elements=[booleans(), from_dtype(np.dtype(CAT_DTYPE)), booleans()], unique=[False, True, False], min_side={"n": 1}, map_=decoder_gen_broadcast, ) def test_decode_broadcast_int(): broadcast_tester( sp.decode, "(m,n),(n),()->(m)", otype=CAT_DTYPE, excluded=(1, 2), dtype=[np.int_, CAT_DTYPE, np.bool_], elements=[integers(INT_MIN, INT_MAX), from_dtype(np.dtype(CAT_DTYPE)), booleans()], unique=[False, True, False], min_side={"n": 1}, map_=decoder_gen_broadcast, ) def test_decode_broadcast_float(): broadcast_tester( sp.decode, "(m,n),(n),()->(m)", otype=CAT_DTYPE, excluded=(1, 2), dtype=[np.float_, CAT_DTYPE, np.bool_], elements=[floats(), from_dtype(np.dtype(CAT_DTYPE)), booleans()], unique=[False, True, False], min_side={"n": 1}, map_=decoder_gen_broadcast, ) @given(gufunc("()->()", dtype=np.float_, elements=floats())) def test_bilog_props(args): x, = args y = sp.bilog(x) assert sp.bilog(0) == 0 # This could be its own test assert close_enough(y, -sp.bilog(-x), equal_nan=True) assert np.isfinite(y) == np.isfinite(x) @given(gufunc_floats("(2)->(2)", allow_infinity=False, allow_nan=False)) def test_bilog_monotonic(args): x, = args x1, x2 = sorted(np.abs(x)) assert sp.bilog(x1) < sp.bilog((1 + 1e-6) * x2 + 1e-6) @given(gufunc("()->()", dtype=np.float_, elements=floats())) def test_bilog_biexp(args): x, = args assert close_enough(sp.biexp(sp.bilog(x)), x, equal_nan=True) def test_bilog_broadcast(): broadcast_tester(sp.bilog, "()->()", otype=float) def test_biexp_broadcast(): broadcast_tester(sp.biexp, "()->()", otype=float, min_value=-10, max_value=10) @given(sampled_from(WARPS), gufunc_floats("(n),(m)->(n)", allow_infinity=False, allow_nan=False)) def test_real_values_warp_unwarp(warp, args): x, values = args if warp == "log": values = values[values > 0] if warp == "logit": values = values[(0 < values) & (values < 1)] # We could eliminate need for this if we split out test for log and logit # cases and specify unique flag, but works as is v = np.unique(values) assume(len(v) >= 2) f = interp1d(v, v, kind="nearest", fill_value="extrapolate") x = f(x) assert x.ndim == 1 # make sure interp1d did not mess it up S = sp.Real(warp=warp, values=values) y = S.warp(x) assert y.shape == x.shape + (1,) assert y.dtype == sp.WARPED_DTYPE # Test bounds lower, upper = S.get_bounds().T assert np.all(lower <= y) assert np.all(y <= upper) y2 = S.validate_warped(y) assert close_enough(y, y2) x2 = S.unwarp(y) assert x2.shape == x.shape x3 = S.validate(x2) assert close_enough(x2, x3) assert close_enough(x, x2) @given(sampled_from(WARPS), gufunc_floats("(n),(2)->(n)", allow_infinity=False, allow_nan=False)) def test_real_range_warp_unwarp(warp, args): x, range_ = args if warp == "log": range_ = range_[range_ > 0] if warp == "logit": range_ = range_[(0 < range_) & (range_ < 1)] range_ = np.sort(range_) assume(len(range_) == 2 and range_[0] < range_[1]) x = np.clip(x, range_[0], range_[1]) S = sp.Real(warp=warp, range_=range_) y = S.warp(x) assert y.shape == x.shape + (1,) assert y.dtype == sp.WARPED_DTYPE # Test bounds lower, upper = S.get_bounds().T assert np.all(lower <= y) assert np.all(y <= upper) y2 = S.validate_warped(y) assert close_enough(y, y2) x2 = S.unwarp(y) assert x2.shape == x.shape x3 = S.validate(x2) assert close_enough(x2, x3) assert close_enough(x, x2) # Note to really stress test this we should elim min and max val, but that # requires that we split out a diff test func for log and logit @given(sampled_from(WARPS), gufunc_floats("(n,1),(2)->(n)", min_value=-1000, max_value=1000)) def test_real_range_unwarp_warp(warp, args): x_w, range_ = args if warp == "log": range_ = range_[range_ > 0] if warp == "logit": range_ = range_[(0 < range_) & (range_ < 1)] range_ = np.sort(range_) assume(len(range_) == 2 and range_[0] < range_[1]) range_warped = sp.WARP_DICT[warp](range_) x_w = np.clip(x_w, range_warped[0], range_warped[1]) S = sp.Real(warp=warp, range_=range_) # Test bounds lower, upper = S.get_bounds().T x_w = linear_rescale(x_w, lb0=-1000, ub0=1000, lb1=lower, ub1=upper) x = S.unwarp(x_w) assert x_w.shape == x.shape + (1,) assert x.dtype == range_.dtype assert x.dtype == S.dtype x2 = S.validate(x) assert close_enough(x, x2) x_w2 = S.warp(x) assert x_w2.shape == x_w.shape x_w3 = S.validate_warped(x_w2) assert close_enough(x_w2, x_w3) assert close_enough(x_w, x_w2) @given( sampled_from(("linear", "bilog")), gufunc("(n),(m)->(n)", dtype=np.int_, elements=integers(INT_MIN, INT_MAX), unique=[False, True], min_side={"m": 2}), ) def test_int_values_warp_unwarp(warp, args): x, values = args v = np.unique(values) # Also sort assert len(v) >= 2 f = interp1d(v, v, kind="nearest", fill_value="extrapolate") x = f(x).astype(values.dtype) assert x.ndim == 1 # make sure interp1d did not mess it up S = sp.Integer(warp=warp, values=values) y = S.warp(x) assert y.shape == x.shape + (1,) assert y.dtype == sp.WARPED_DTYPE # Test bounds lower, upper = S.get_bounds().T assert np.all(lower <= y) assert np.all(y <= upper) y2 = S.validate_warped(y) assert close_enough(y, y2) x2 = S.unwarp(y) assert x2.shape == x.shape x3 = S.validate(x2) assert close_enough(x2, x3) assert close_enough(x, x2) @given(gufunc("(n),(m)->(n)", dtype=np.int_, elements=integers(1, INT_MAX), unique=[False, True], min_side={"m": 2})) def test_log_int_values_warp_unwarp(args): x, values = args warp = "log" v = np.unique(values) # Also sort assert len(v) >= 2 f = interp1d(v, v, kind="nearest", fill_value="extrapolate") x = f(x).astype(values.dtype) assert x.ndim == 1 # make sure interp1d did not mess it up S = sp.Integer(warp=warp, values=values) y = S.warp(x) assert y.shape == x.shape + (1,) assert y.dtype == sp.WARPED_DTYPE # Test bounds lower, upper = S.get_bounds().T assert np.all(lower <= y) assert np.all(y <= upper) y2 = S.validate_warped(y) assert close_enough(y, y2) x2 = S.unwarp(y) assert x2.shape == x.shape x3 = S.validate(x2) assert close_enough(x2, x3) assert close_enough(x, x2) @given(sampled_from(("linear", "bilog", "log")), gufunc("(n),(2)->(n)", dtype=np.int_, elements=integers(-1000, 1000))) def test_int_range_warp_unwarp(warp, args): """Warning: this explicitly ignores issues with min max if going to int limit, since >>> np.array(INT_MAX).astype(np.float32).astype(np.int32) array(-2147483648, dtype=int32) Without any warning from numpy. """ x, range_ = args # We could split out log into diff function without this pruning if we # start failing hypothesis health check. if warp == "log": range_ = range_[range_ > 0] range_ = np.sort(range_) assume(len(range_) == 2 and range_[0] < range_[1]) x = np.clip(x, range_[0], range_[1]).astype(range_.dtype) S = sp.Integer(warp=warp, range_=range_) y = S.warp(x) assert y.shape == x.shape + (1,) assert y.dtype == sp.WARPED_DTYPE # Test bounds lower, upper = S.get_bounds().T assert np.all(lower <= y) assert np.all(y <= upper) y2 = S.validate_warped(y) assert close_enough(y, y2) x2 = S.unwarp(y) assert x2.shape == x.shape x3 = S.validate(x2) assert close_enough(x2, x3) assert x.dtype == x2.dtype # Close enough when evaluated as floats assert close_enough(x.astype("f"), x2.astype("f")) @given(gufunc("(n)->(n)", dtype=np.bool_, elements=booleans())) def test_bool_warp_unwarp(args): x, = args S = sp.Boolean() y = S.warp(x) assert y.shape == x.shape + (1,) assert y.dtype == sp.WARPED_DTYPE # Test bounds lower, upper = S.get_bounds().T assert np.all(lower <= y) assert np.all(y <= upper) y2 = S.validate_warped(y) assert close_enough(y, y2) x2 = S.unwarp(y) assert x2.shape == x.shape x3 = S.validate(x2) assert close_enough(x2, x3) assert close_enough(x, x2) @given( gufunc( "(n),(m)->(n)", dtype=[np.int_, CAT_DTYPE], elements=[integers(0, INT_MAX), from_dtype(np.dtype(CAT_DTYPE))], unique=[False, True], min_side={"m": 2}, ) ) def test_cat_warp_unwarp(args): x, values = args assert len(set(values)) >= 2 x = values[x % len(values)] assert x.ndim == 1 S = sp.Categorical(values=values) y = S.warp(x) assert y.shape == x.shape + (len(values),) assert y.dtype == sp.WARPED_DTYPE # Test bounds lower, upper = S.get_bounds().T assert np.all(lower <= y) assert np.all(y <= upper) y2 = S.validate_warped(y) assert close_enough(y, y2) x2 = S.unwarp(y) assert x2.shape == x.shape x3 = S.validate(x2) assert close_enough(x2, x3) assert close_enough(x, x2) @given(space_configs()) def test_joint_space_unwarp_warp(args): meta, X, _, _ = args S = sp.JointSpace(meta) S.validate(X) X_w2 = S.warp(X) assert X_w2.dtype == sp.WARPED_DTYPE # Test bounds lower, upper = S.get_bounds().T assert np.all(lower <= X_w2) assert np.all(X_w2 <= upper) X2 = S.unwarp(X_w2) assert all(all(close_enough(X[ii][vv], X2[ii][vv]) for vv in X[ii]) for ii in range(len(X))) S.validate(X2) @given(space_configs()) def test_joint_space_warp_missing(args): meta, X, _, fixed_vars = args S = sp.JointSpace(meta) X_w = S.warp([fixed_vars]) assert X_w.dtype == sp.WARPED_DTYPE # Test bounds lower, upper = S.get_bounds().T assert np.all((lower <= X_w) | np.isnan(X_w)) assert np.all((X_w <= upper) | np.isnan(X_w)) for param, xx in zip(S.param_list, np.hsplit(X_w, S.blocks[:-1])): xx, = xx if param in fixed_vars: x_orig = S.spaces[param].unwarp(xx).item() S.spaces[param].validate(x_orig) assert close_enough(x_orig, fixed_vars[param]) # check other direction x_w2 = S.spaces[param].warp(fixed_vars[param]) assert close_enough(xx, x_w2) else: assert np.all(np.isnan(xx)) @given(space_configs()) def test_joint_space_warp_fixed_vars(args): meta, X, _, fixed_vars = args # set X vals equal to fixed_vars for xx in X: for param in fixed_vars: xx[param] = fixed_vars[param] S = sp.JointSpace(meta) lower, upper = S.get_bounds().T X_w = S.warp(X) assert X_w.dtype == sp.WARPED_DTYPE # Test bounds lower, upper = S.get_bounds().T assert np.all(lower <= X_w) assert np.all(X_w <= upper) X2 = S.unwarp(X_w, fixed_vals=fixed_vars) # Make sure we get == not just close in unwarp for fixed vars for xx in X2: for param in fixed_vars: assert xx[param] == fixed_vars[param] @given(space_configs(), integers(min_value=0, max_value=10)) def test_joint_grid(args, max_interp): meta, _, _, _ = args type_whitelist = (bool, int, float, CAT_NATIVE_DTYPE) S = sp.JointSpace(meta) lower, upper = S.get_bounds().T G = S.grid(max_interp=max_interp) assert sorted(G.keys()) == sorted(meta.keys()) for var, grid in G.items(): curr_space = S.spaces[var] # Make sure same as calling direct grid2 = curr_space.grid(max_interp) assert grid == grid2 if len(grid) == 0: assert grid == [] assert max_interp == 0 if curr_space.values is None else len(curr_space.values) == 0 continue # Make sure native type assert all(type(xx) in type_whitelist for xx in grid) tt = type(grid[0]) assert all(type(xx) == tt for xx in grid) assert np.all(np.array(grid) == np.unique(grid)) if max_interp >= 2: assert curr_space.lower is None or close_enough(curr_space.lower, grid[0]) assert curr_space.upper is None or close_enough(curr_space.upper, grid[-1]) if curr_space.values is not None: assert np.all(curr_space.values == grid) else: assert len(grid) <= max_interp # Could else, check approx linear in warped space, but good enough # for now. def test_unravel_index_empty(): assert sp.unravel_index(()) == () @given(lists(integers(0, 2), min_size=1)) def test_unravel_index_empty_2(dims): if np.prod(dims) > 0: dims[0] = 0 dims = tuple(dims) assert sp.unravel_index(dims) == () ================================================ FILE: test/stats_test.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import scipy.stats as sst from hypothesis import assume, given from hypothesis.strategies import integers, lists, sampled_from from hypothesis_gufunc.gufunc import gufunc_args from sklearn.preprocessing import robust_scale from bayesmark import stats from hypothesis_util import close_enough, mfloats, probs, seeds def t_test_(x): """Perform a standard t-test to test if the values in `x` are sampled from a distribution with a zero mean. Parameters ---------- x : array-like, shape (n_samples,) array of data points to test. Returns ------- pval : float p-value (in [0,1]) from t-test on `x`. """ assert np.ndim(x) == 1 and (not np.any(np.isnan(x))) if (len(x) <= 1) or (not np.all(np.isfinite(x))): return 1.0 # Can't say anything about scale => p=1 _, pval = sst.ttest_1samp(x, 0.0) if np.isnan(pval): # Should only be possible if scale underflowed to zero: assert np.var(x, ddof=1) <= 1e-100 # It is debatable if the condition should be ``np.mean(x) == 0.0`` or # ``np.all(x == 0.0)``. Should not matter in practice. pval = np.float(np.mean(x) == 0.0) assert 0.0 <= pval and pval <= 1.0 return pval @given(gufunc_args("(n),()->(n)", dtype=np.float_, elements=[mfloats(), probs()], min_side=2)) def test_robust_standardize_to_sklearn(args): X, q_level = args q0, q1 = 0.5 * (1.0 - q_level), 0.5 * (1.0 + q_level) assert close_enough(q1 - q0, q_level) X_bo = stats.robust_standardize(X, q_level=q_level) X = X[:, None] X_skl = robust_scale(X, axis=0, with_centering=True, with_scaling=True, quantile_range=[100.0 * q0, 100.0 * q1]) X_skl = X_skl[:, 0] * (sst.norm.ppf(q1) - sst.norm.ppf(q0)) assert close_enough(X_bo, X_skl, equal_nan=True) def test_robust_standardize_broadcast(): """Need to do things different here since standardize broadcasts over the wrong dimension (0 instead of -1). """ # Build vectorize version, this is just loop inside. f_vec = np.vectorize(stats.robust_standardize, signature="(n),()->(n)", otypes=["float64"]) @given(gufunc_args("(n,m),()->(n,m)", dtype=np.float_, min_side={"n": 2}, elements=[mfloats(), probs()])) def test_f(args): X, q_level = args R1 = stats.robust_standardize(X, q_level) R2 = f_vec(X.T, q_level).T assert R1.dtype == "float64" assert R2.dtype == "float64" assert close_enough(R1, R2, equal_nan=True) # Call the test test_f() @given(integers(0, 10), mfloats(), probs()) def test_t_EB_zero_var(N, val, alpha): x = val + np.zeros(N) EB = stats.t_EB(x, alpha=alpha) if N <= 1: assert EB == np.inf else: assert np.allclose(EB, 0.0) @given(integers(1, 10), sampled_from([np.inf, -np.inf]), probs()) def test_t_EB_inf(N, val, alpha): x = np.zeros(N) x[0] = val EB = stats.t_EB(x, alpha=alpha) if N <= 1: assert EB == np.inf else: assert np.isnan(EB) @given(seeds(), probs(), integers(2, 10)) def test_t_EB_coverage(seed, alpha, N): trials = 100 random_st = np.random.RandomState(seed) fail = 0 for tt in range(trials): x = random_st.randn(N) EB = stats.t_EB(x, alpha=alpha) mu = np.nanmean(x) LB, UB = mu - EB, mu + EB assert np.isfinite(LB) and np.isfinite(UB) fail += (0.0 < LB) or (UB < 0.0) pval = sst.binom_test(fail, trials, alpha) assert pval >= 0.05 / 100 # Assume we run 100 times @given(lists(mfloats(), min_size=2)) def test_t_test_to_EB(x): pval = t_test_(x) assume(0.0 < pval and pval < 1.0) EB = stats.t_EB(x, alpha=pval) assert np.allclose(np.abs(np.mean(x)), EB) ================================================ FILE: test/util.py ================================================ # Copyright (c) 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np from hypothesis import assume from hypothesis.extra.numpy import arrays from hypothesis.strategies import ( binary, booleans, composite, dictionaries, floats, from_regex, frozensets, integers, lists, sampled_from, text, ) from hypothesis_gufunc.extra.xr import fixed_dataarrays, simple_coords, xr_coords import bayesmark.space as sp from bayesmark.constants import ARG_DELIM, ITER, METHOD, RANDOM_SEARCH, SUGGEST, TEST_CASE, TRIAL from bayesmark.np_util import linear_rescale NULL_PLUG = "\x00" def _easy_text(): # The NULL_PLUG confuses numpy arrays, so assume that is not in S = text().filter(lambda ss: NULL_PLUG not in ss) return S def _hashable(): S = floats() | integers() | _easy_text() return S CAT_STGY = _easy_text if sp.CAT_KIND == "U" else binary F_MIN = np.nextafter(0, 1) RANGES = {"linear": (-1000, 1000), "log": (F_MIN, 1000), "logit": (F_MIN, np.nextafter(1, 0)), "bilog": (-100, 100)} SPACES = tuple(sorted(sp.SPACE_DICT.keys())) @composite def space_vars(draw, max_values=5): """Build composite strategy for random API calls.""" type_ = draw(sampled_from(SPACES)) use_values = draw(booleans()) if type_ == "real": warp = draw(sampled_from(("linear", "log", "logit", "bilog"))) min_val, max_val = RANGES[warp] if use_values: # Generating unique values to ensure that always have more than 2 # unique values, but code is designed to accept non-unique values # arrays as long as more than 2 non-unique. Could generalize this. values = draw(lists(floats(min_val, max_val), min_size=2, max_size=max_values, unique=True)) D = {"type": type_, "space": warp, "values": values} else: range_ = tuple(sorted(draw(lists(floats(min_val, max_val), min_size=2, max_size=2, unique=True)))) D = {"type": type_, "space": warp, "range": range_} elif type_ == "int": warp = draw(sampled_from(("linear", "log", "bilog"))) min_val, max_val = RANGES[warp] # Must shrink these to next integers in range to keep hypothesis happy min_val = int(np.ceil(min_val)) max_val = int(np.floor(max_val)) if use_values: values = draw(lists(integers(min_val, max_val), min_size=2, max_size=max_values, unique=True)) D = {"type": type_, "space": warp, "values": values} else: range_ = tuple(sorted(draw(lists(integers(min_val, max_val), min_size=2, max_size=2, unique=True)))) D = {"type": type_, "space": warp, "range": range_} elif type_ == "bool": D = {"type": type_} elif type_ == "cat" or type_ == "ordinal": values = draw(lists(CAT_STGY(), min_size=2, max_size=max_values, unique=True)) # This assume is needed because np.unique has bug for null plug # .. >>> np.unique([u'', u'\x00']) # .. array([u''], dtype=' 0) assume(len(da_seq[0].dims) > 0) da = da_seq[0] kk = da.dims[0] da_seq[0] = da.assign_coords(**{kk: range(da.sizes[kk])}) xru.coord_compat(da_seq, common_dims) @given(dataarrays(min_dims=1, max_dims=1)) def test_da_to_string(da): xru.da_to_string(da) @given(dataarrays(min_side=0, min_dims=0), integers(1, 3)) @settings(deadline=None) def test_da_concat(da, n): assume(n < len(da.dims)) da_dict, keys_to_slice = da_split(da, n) assume(len(da_dict) > 0) assert len(keys_to_slice) == n xru.da_concat(da_dict, dims=keys_to_slice) def da_split(da, n): assert 0 < n assert n <= len(da.dims) keys_to_slice = da.dims[-n:] da_dict = {} vals = [da.coords[kk].values.tolist() for kk in keys_to_slice] for vv in product(*vals): lookup = dict(zip(keys_to_slice, vv)) da_dict[tuple(vv)] = da.sel(lookup, drop=True) return da_dict, keys_to_slice @given(datasets(min_side=1, min_dims=1), integers(1, 3)) @settings(deadline=None) def test_ds_concat(ds, n): all_dims = [ds[kk].dims for kk in ds] common_dims = sorted(intersect_seq(all_dims)) n = min([n, len(common_dims) - 1]) assume(0 < n) keys_to_slice = common_dims[:n] ds_dict = {} vals = [ds.coords[kk].values.tolist() for kk in keys_to_slice] for vv in product(*vals): lookup = dict(zip(keys_to_slice, vv)) ds_dict[vv] = ds.sel(lookup, drop=True) xru.ds_concat(ds_dict, dims=keys_to_slice) ================================================ FILE: test.sh ================================================ #!/bin/bash set -ex set -o pipefail # Set conda paths export CONDA_PATH=./tmp/conda export CONDA_ENVS=env # Sometime pip PIP_REQUIRE_VIRTUALENV has issues with conda export PIP_REQUIRE_VIRTUALENV=false PY_VERSIONS=( "3.6" "3.7" ) # Handy to know what we are working with git --version # Cleanup workspace, src for any old -e installs git clean -x -f -d rm -rf src/ # Install miniconda if command -v conda 2>/dev/null; then echo "Conda already installed" else # We need to use miniconda since we can't figure out ho to install py3.6 in # this env image. We could also use Miniconda3-latest-Linux-x86_64.sh but # pinning version to make reprodicible. echo "Installing miniconda" if [[ "$OSTYPE" == "darwin"* ]]; then # In future let's also try, for reprodicibility: # curl -L -o miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-4.5.12-MacOSX-x86_64.sh; curl -L -o miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh; else # In future let's also try, for reprodicibility: # curl -L -o miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-4.5.12-Linux-x86_64.sh; curl -L -o miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh; fi chmod +x ./miniconda.sh ./miniconda.sh -b -p $CONDA_PATH rm ./miniconda.sh fi export PATH=$CONDA_PATH/bin:$PATH # Setup env just for installing pre-commit to run hooks on all files rm -rf "$CONDA_ENVS" ENV_PATH="${CONDA_ENVS}/bobm_commit_hooks" conda create -y -q -p $ENV_PATH python=3.6 echo $ENV_PATH source activate $ENV_PATH python --version pip freeze | sort # not listing 2nd order deps here, but probably ok pip install -r requirements/tools.txt # Now run hooks on all files, don't need to install hooks since run directly pre-commit run --all-files # Now can leave env with pre-commit conda deactivate # Also check no changes to files by hooks test -z "$(git diff)" # clean up for good measure, but need to keep miniconda tmp folder git clean -x -f -d --exclude=tmp # Tool to get compare only the package names in pip file # On mac, sed -r needs to be seed -E nameonly () { grep -i '^[a-z0-9]' | sed -E "s/([^=]*)==.*/\1/g" | tr _ - | sort -f; } nameveronly () { grep -i '^[a-z0-9]' | awk '{print $1}' | tr _ - | sort -f; } pipcheck () { cat $@ | grep -i '^[a-z0-9]' | awk '{print $1}' | sed -f requirements/pipreqs_edits.sed | sort -f | uniq >ask.log && pip freeze | sed -f requirements/pipreqs_edits.sed | sort -f >got.log && diff -i ask.log got.log; } # Now test the deps ENV_PATH="${CONDA_ENVS}/deps_test" conda create -y -q -p $ENV_PATH python=3.6 echo $ENV_PATH source activate $ENV_PATH python --version pip freeze | sort # Install all requirements, make sure they are mutually compatible pip install -r requirements/base.txt pipcheck requirements/base.txt # Install package python setup.py install pipcheck requirements/base.txt requirements/self.txt pip install -r requirements/optimizers.txt pipcheck requirements/base.txt requirements/self.txt requirements/optimizers.txt pip install -r requirements/test.txt pipcheck requirements/base.txt requirements/self.txt requirements/optimizers.txt requirements/test.txt pip install -r requirements/ipynb.txt pipcheck requirements/base.txt requirements/self.txt requirements/test.txt requirements/optimizers.txt requirements/ipynb.txt pip install -r requirements/docs.txt pipcheck requirements/base.txt requirements/self.txt requirements/test.txt requirements/optimizers.txt requirements/ipynb.txt requirements/docs.txt pip install -r requirements/tools.txt # Make sure .in file corresponds to what is imported nameonly ask.log pipreqs bayesmark/ --ignore bayesmark/builtin_opt/ --savepath requirement_chk.in sed -f requirements/pipreqs_edits.sed requirement_chk.in | nameonly >got.log diff ask.log got.log nameonly ask.log pipreqs test/ --savepath requirement_chk.in sed -f requirements/pipreqs_edits.sed requirement_chk.in | nameonly >got.log diff ask.log got.log nameonly ask.log pipreqs bayesmark/builtin_opt/ --savepath requirement_chk.in sed -f requirements/pipreqs_edits.sed requirement_chk.in | nameonly >got.log diff ask.log got.log nameonly ask.log pipreqs docs/ --savepath requirement_chk.in sed -f requirements/pipreqs_edits.sed requirement_chk.in | nameonly >got.log diff ask.log got.log nameonly ask.log jupyter nbconvert --to script notebooks/*.ipynb pipreqs notebooks/ --savepath requirement_chk.in sed -f requirements/pipreqs_edits.sed requirement_chk.in | nameonly >got.log diff ask.log got.log # Make sure txt file corresponds to pip compile # First copy the originals for f in requirements/*.txt; do cp -- "$f" "${f%.txt}.chk"; done # Now re-compile # no-upgrade means that by default it keeps the 2nd order dependency versions already in the requirements txt file # (otherwise it brings it to the very latest available version which often causes issues). pip-compile-multi -o txt --no-upgrade nameveronly ask.log sed -f requirements/pipreqs_edits.sed requirements/base.txt | nameveronly >got.log diff ask.log got.log nameveronly ask.log sed -f requirements/pipreqs_edits.sed requirements/test.txt | nameveronly >got.log diff ask.log got.log nameveronly ask.log sed -f requirements/pipreqs_edits.sed requirements/optimizers.txt | nameveronly >got.log diff ask.log got.log nameveronly ask.log sed -f requirements/pipreqs_edits.sed requirements/ipynb.txt | nameveronly >got.log diff ask.log got.log nameveronly ask.log sed -f requirements/pipreqs_edits.sed requirements/docs.txt | nameveronly >got.log diff ask.log got.log nameveronly ask.log sed -f requirements/pipreqs_edits.sed requirements/tools.txt | nameveronly >got.log diff ask.log got.log # Deactivate virtual environment conda deactivate # Set up environments for all Python versions and loop over them rm -rf "$CONDA_ENVS" for i in "${PY_VERSIONS[@]}" do # Now test the deps ENV_PATH="${CONDA_ENVS}/unit_test" conda create -y -q -p $ENV_PATH python=$i echo $ENV_PATH source activate $ENV_PATH python --version pip freeze | sort # Install all requirements pip install -r requirements/test.txt # Install package python setup.py install # Run tests pytest test/ -s -v --hypothesis-seed=0 --disable-pytest-warnings --cov=bayesmark --cov-report html conda deactivate done ================================================ FILE: tools/archive_branch.sh ================================================ #!/bin/bash set -ex set -o pipefail DATE=$(date +"%Y%m%d") TAGNAME=archive/$DATE-$1 # Fail if untracked files test -z "$(git status --porcelain)" # Fail if origin and local differ git diff $1 origin/$1 --quiet # Prune remotes for good measure git remote prune origin git checkout $1 git tag -a $TAGNAME -m "archived branch $1 on $DATE" git checkout master git push origin $TAGNAME # Make sure we tagged correctly for good measure diff <(git rev-list $TAGNAME -n 1) <(git rev-parse $1) git ls-remote --tags origin | grep $(git rev-parse $1) git branch -D $1 git push origin --delete $1 echo "cleaned up" ================================================ FILE: tools/deploy.sh ================================================ #!/bin/bash # # Note that # UUID=$(uuidgen) # works on Mac OS by default, but requires installation on linux. set -ex set -o pipefail # Script arguments REMOTE=$1 BRANCH=$2 PACKAGE=$3 VERSION=$4 # Check to make sure we have keys setup right before we start git push --dry-run # Check versions are there, this is a crude way to do it but it works grep "^$PACKAGE==$VERSION\$" requirements/self.txt grep '^__version__ = "'$VERSION'"$' bayesmark/__init__.py grep 'version="'$VERSION'",$' setup.py # Where envs go ENVS=~/envs # Which python version this uses PY=python3.7 # Which env contains twine and py version we use TWINE_ENV=twine_env # Where to run tar ball tests from TEST_DIR=~/tmp/deploy_tests mkdir -p $TEST_DIR # Get the dir REPO_DIR=$(pwd) git checkout $BRANCH # Fail if untracked files and clean test -z "$(git status --porcelain)" git clean -x -ff -d # Run tests locally and cleanup ./integration_test_with_setup.sh ./test.sh git reset --hard HEAD git clean -x -ff -d test -z "$(git status --porcelain)" # push to remote and check git push -u $REMOTE $BRANCH git diff $BRANCH $REMOTE/$BRANCH --quiet # See if tests pass remote, TODO use travis CLI read -t 1 -n 10000 discard || true read -p "Travis tests pass [y/n]? " -r if [[ ! $REPLY =~ ^[Yy]$ ]] then exit 1 fi # test tar ball source $ENVS/$TWINE_ENV/bin/activate ./build_wheel.sh twine check dist/* deactivate cd $TEST_DIR UUID=$(uuidgen) mkdir $UUID cd $UUID virtualenv env --python=$PY source ./env/bin/activate pip install -r $REPO_DIR/requirements/test.txt pip install $REPO_DIR/dist/*.tar.gz cp -r $REPO_DIR/test . pytest test/ -s -v --hypothesis-seed=0 --disable-pytest-warnings deactivate cd $REPO_DIR # Cleanup since we will build again git clean -x -ff -d test -z "$(git status --porcelain)" # merge master # Fail if origin and local differ git checkout $BRANCH git diff master $REMOTE/master --quiet git merge master --no-commit # Fail if not clean test -z "$(git status --porcelain)" # merge to master git checkout master git merge $BRANCH --squash --no-commit git status read -t 1 -n 10000 discard || true read -p "Commit message (CTRL-C to abort): " git commit -m "$REPLY" # Fail if not clean test -z "$(git status --porcelain)" # Run tests locally and cleanup ./integration_test_with_setup.sh ./test.sh git reset --hard HEAD git clean -x -ff -d test -z "$(git status --porcelain)" # test tar ball source $ENVS/$TWINE_ENV/bin/activate ./build_wheel.sh twine check dist/* deactivate cd $TEST_DIR UUID=$(uuidgen) mkdir $UUID cd $UUID virtualenv env --python=$PY source ./env/bin/activate pip install -r $REPO_DIR/requirements/test.txt pip install $REPO_DIR/dist/*.tar.gz cp -r $REPO_DIR/test . pytest test/ -s -v --hypothesis-seed=0 --disable-pytest-warnings deactivate cd $REPO_DIR # push to test pypi source $ENVS/$TWINE_ENV/bin/activate twine upload --repository-url https://test.pypi.org/legacy/ dist/* deactivate echo "ready to run?" echo "pip install $PACKAGE==$VERSION --index-url https://test.pypi.org/simple/" read -p "Enter when pypi has updated: " -r # install and test cd $TEST_DIR UUID=$(uuidgen) mkdir $UUID cd $UUID virtualenv env --python=$PY source ./env/bin/activate pip install -r $REPO_DIR/requirements/test.txt pip install -r $REPO_DIR/requirements/ipynb.txt pip install $PACKAGE==$VERSION --index-url https://test.pypi.org/simple/ cp $REPO_DIR/integration_test.sh . cp -r $REPO_DIR/notebooks . cp -r $REPO_DIR/example_opt_root . ./integration_test.sh cp -r $REPO_DIR/test . pytest test/ -s -v --hypothesis-seed=0 --disable-pytest-warnings deactivate cd $REPO_DIR # push to remote and check git push $REMOTE master git diff master $REMOTE/master --quiet # Show sha256sum in case we want to check against PyPI test, use || for Mac OS version sha256sum dist/* || shasum -a 256 dist/* # See if tests pass remote, TODO use travis CLI read -t 1 -n 10000 discard || true read -p "Travis tests pass, and push to PyPI? This cannot be undone. [push/no]" -r if [[ ! $REPLY == push ]] then exit 1 fi # push to full pypi source $ENVS/$TWINE_ENV/bin/activate twine upload dist/* deactivate echo "ready to run?" echo "pip install $PACKAGE==$VERSION" read -p "Enter when pypi has updated: " -r # install and test cd $TEST_DIR UUID=$(uuidgen) mkdir $UUID cd $UUID virtualenv env --python=$PY source ./env/bin/activate pip install -r $REPO_DIR/requirements/test.txt pip install -r $REPO_DIR/requirements/ipynb.txt pip install $PACKAGE==$VERSION cp $REPO_DIR/integration_test.sh . cp -r $REPO_DIR/notebooks . cp -r $REPO_DIR/example_opt_root . ./integration_test.sh cp -r $REPO_DIR/test . pytest test/ -s -v --hypothesis-seed=0 --disable-pytest-warnings deactivate cd $REPO_DIR # clean and tag git clean -x -ff -d test -z "$(git status --porcelain)" git tag -a v$VERSION -m "$PACKAGE version $VERSION" git push $REMOTE v$VERSION # remind user to archive/delete branch echo "remember to delete branch $BRANCH, and update readthedocs.io" echo "done"