[
  {
    "path": ".gitignore",
    "content": "*.DS_Store\n*.sw*\n*.pyc\n*.Rapp.history\n*.*~\n*.out\n*hide\n*hide_*\n*save\n*save_*\n*saved\n*saved_*\n*pylintrc*\nnasbench_only108.tfrecord\n.ipynb_checkpoints/*\n\n*aux.pkl\n*config.pkl\n*nextpt.pkl\n*data.pkl\n*log.txt\n\n"
  },
  {
    "path": "LICENSE",
    "content": "   Copyright (c) 2019, naszilla.\n   All rights reserved.\n\n                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright [yyyy] [name of copyright owner]\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License."
  },
  {
    "path": "README.md",
    "content": "# BANANAS\n\n**Note: our naszilla/bananas repo has been extended and renamed to [naszilla/naszilla](https://github.com/naszilla/naszilla), and this repo is deprecated and not maintained. Please use [naszilla/naszilla](https://github.com/naszilla/naszilla), which has more functionality.**\n\n[BANANAS: Bayesian Optimization with Neural Architectures for Neural Architecture Search](https://arxiv.org/abs/1910.11858)\\\nColin White, Willie Neiswanger, and Yash Savani.\\\n_arXiv:1910.11858_.\n\n## A new method for neural architecture search\nBANANAS is a neural architecture search (NAS) algorithm which uses Bayesian optimization with a meta neural network to predict the validation accuracy of neural architectures. We use a path-based encoding scheme to featurize the neural architectures that are used to train the neural network model. After training on just 200 architectures, we are able to predict the validation accuracy of new architectures to within one percent on average. The full NAS algorithm beats the state of the art on the NASBench and the DARTS search spaces. On the NASBench search space, BANANAS is over 100x more efficient than random search, and 3.8x more efficent than the next-best algorithm we tried. On the DARTS search space, BANANAS finds an architecture with a test error of 2.57%.\n\n<p align=\"center\">\n<img src=\"img/bananas_fig.png\" alt=\"bananas_fig\" width=\"70%\">\n</p>\n\n## Requirements\n- jupyter\n- tensorflow == 1.14.0 (used for all experiments)\n- nasbench (follow the installation instructions [here](https://github.com/google-research/nasbench))\n- nas-bench-201 (follow the installation instructions [here](https://github.com/D-X-Y/NAS-Bench-201))\n- pytorch == 1.2.0, torchvision == 0.4.0 (used for experiments on the DARTS search space)\n- pybnn (used only for the DNGO baselien algorithm. Installation instructions [here](https://github.com/automl/pybnn))\n\nIf you run experiments on DARTS, you will need our fork of the darts repo:\n- Download the repo: https://github.com/naszilla/darts\n- If the repo is not in your home directory, i.e., `~/darts`, then update line 5 of `bananas/darts/arch.py` and line 8 of `bananas/train_arch_runner.py` with the correct path to this repo\n\n\n## Train a meta neural network with a notebook on the NASBench dataset\n- Download the nasbench_only108 tfrecord file (size 499MB) [here](https://storage.googleapis.com/nasbench/nasbench_only108.tfrecord)\n- Place `nasbench_only108.tfrecord` in the top level folder of this repo\n- Open and run `meta_neuralnet.ipynb` to reproduce Table 1 and Figure A.1 of our paper\n\n<p align=\"center\">\n  <img src=\"img/metann_adj_train.png\" alt=\"bananas_fig\" width=\"24%\">\n  <img src=\"img/metann_adj_test.png\" alt=\"bananas_fig\" width=\"24%\">\n  <img src=\"img/metann_path_train.png\" alt=\"bananas_fig\" width=\"24%\">\n  <img src=\"img/metann_path_test.png\" alt=\"bananas_fig\" width=\"24%\">\n</p>\n\n## Evaluate pretrained BANANAS architecture\nThe best architecture found by BANANAS on the DARTS search space achieved 2.57% test error. To evaluate our pretrained neural architecture, download the weights [bananas.pt](https://drive.google.com/file/d/1d8jnI0R9fvXBjkIY7CRogyxynEh6TWu_/view?usp=sharing) and put it inside the folder `<path-to-darts>/cnn`\n\n```bash\ncd <path-to-darts>/cnn; python test.py --model_path bananas.pt\n```\n\nThe error on the test set should be 2.57%. This can be run on a CPU or GPU, but it will be faster on a GPU.\n\n<p align=\"center\">\n<img src=\"img/bananas_normal.png\" alt=\"bananas_normal\" width=\"42%\">\n<img src=\"img/bananas_reduction.png\" alt=\"bananas_reduction\" width=\"47%\">\n</p>\n<p align=\"center\">\nThe best neural architecture found by BANANAS on CIFAR-10. Convolutional cell (left), and reduction cell (right).\n</p>\n\n## Train BANANAS architecture\nTrain the best architecture found by BANANAS.\n\n```bash\ncd <path-to-darts>/cnn; python train.py --auxiliary --cutout\n```\n\nThis will train the architecture from scratch, which takes about 34 hours on an NVIDIA V100 GPU. \nThe final test error should be 2.59%.\nSetting the random seed to 4 by adding `--seed 4` will result in a test error of 2.57%.\nWe report the random seeds and hardware used in Table 2 of our paper [here](https://docs.google.com/spreadsheets/d/1z6bHUgX8r0y9Bh9Zxot_B9nT_9qLWJoD0Um0fTYdpus/edit?usp=sharing).\n\n## Run BANANAS on the NASBench search space\nTo run BANANAS on NASBench, download `nasbench_only108.tfrecord` and place it in the top level folder of this repo.\n\n```bash\npython run_experiments_sequential.py\n```\n\nThis will test the nasbench algorithm against several other NAS algorithms on the NASBench search space.\nTo customize your experiment, open `params.py`. Here, you can change the hyperparameters and the algorithms to run.\nTo run experiments with NAS-Bench-201, download `NAS-Bench-201-v1_0-e61699.pth` and place it in the top level folder of this repo.\nChoose between cifar10, cifar100, and imagenet. For example,\n\n```bash\npython run_experiments_sequential.py --search_space nasbench_201_cifar10\n```\n\n<p align=\"center\">\n<img src=\"img/nasbench_plot.png\" alt=\"nasbench_plot\" width=\"70%\">\n</p>\n\n## Run BANANAS on the DARTS search space\nWe highly recommend using multiple GPUs to run BANANAS on the DARTS search space. You can run BANANAS in parallel on GCP using the shell script:\n\n```bash\nrun_experiments_parallel.sh\n```\n\n## Contributions\nWe welcome community contributions to this repo!\n\n## Citation\nPlease cite [our paper](https://arxiv.org/abs/1910.11858) if you use code from this repo:\n\n```bibtex\n@inproceedings{white2019bananas,\n  title={BANANAS: Bayesian Optimization with Neural Architectures for Neural Architecture Search},\n  author={White, Colin and Neiswanger, Willie and Savani, Yash},\n  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},\n  year={2021}\n}\n```\n\n"
  },
  {
    "path": "acquisition_functions.py",
    "content": "import numpy as np\nimport sys\n\n# Different acquisition functions that can be used by BANANAS\ndef acq_fn(predictions, explore_type='its'):\n    predictions = np.array(predictions)\n\n    # Upper confidence bound (UCB) acquisition function\n    if explore_type == 'ucb':\n        explore_factor = 0.5\n        mean = np.mean(predictions, axis=0)\n        std = np.sqrt(np.var(predictions, axis=0))\n        ucb = mean - explore_factor * std\n        sorted_indices = np.argsort(ucb)\n\n    # Expected improvement (EI) acquisition function\n    elif explore_type == 'ei':\n        ei_calibration_factor = 5.\n        mean = list(np.mean(predictions, axis=0))\n        std = list(np.sqrt(np.var(predictions, axis=0)) /\n                   ei_calibration_factor)\n\n        min_y = ytrain.min()\n        gam = [(min_y - mean[i]) / std[i] for i in range(len(mean))]\n        ei = [-1 * std[i] * (gam[i] * norm.cdf(gam[i]) + norm.pdf(gam[i]))\n              for i in range(len(mean))]\n        sorted_indices = np.argsort(ei)\n\n    # Probability of improvement (PI) acquisition function\n    elif explore_type == 'pi':\n        mean = list(np.mean(predictions, axis=0))\n        std = list(np.sqrt(np.var(predictions, axis=0)))\n        min_y = ytrain.min()\n        pi = [-1 * norm.cdf(min_y, loc=mean[i], scale=std[i]) for i in range(len(mean))]\n        sorted_indices = np.argsort(pi)\n\n    # Thompson sampling (TS) acquisition function\n    elif explore_type == 'ts':\n        rand_ind = np.random.randint(predictions.shape[0])\n        ts = predictions[rand_ind,:]\n        sorted_indices = np.argsort(ts)\n\n    # Top exploitation \n    elif explore_type == 'percentile':\n        min_prediction = np.min(predictions, axis=0)\n        sorted_indices = np.argsort(min_prediction)\n\n    # Top mean\n    elif explore_type == 'mean':\n        mean = np.mean(predictions, axis=0)\n        sorted_indices = np.argsort(mean)\n\n    elif explore_type == 'confidence':\n        confidence_factor = 2\n        mean = np.mean(predictions, axis=0)\n        std = np.sqrt(np.var(predictions, axis=0))\n        conf = mean + confidence_factor * std\n        sorted_indices = np.argsort(conf)\n\n    # Independent Thompson sampling (ITS) acquisition function\n    elif explore_type == 'its':\n        mean = np.mean(predictions, axis=0)\n        std = np.sqrt(np.var(predictions, axis=0))\n        samples = np.random.normal(mean, std)\n        sorted_indices = np.argsort(samples)\n\n    else:\n        print('Invalid exploration type in meta neuralnet search', explore_type)\n        sys.exit()\n\n    return sorted_indices"
  },
  {
    "path": "bo/__init__.py",
    "content": "\"\"\"\nCode for running Bayesian Optimization (BO) in NASzilla.\n\"\"\"\n"
  },
  {
    "path": "bo/acq/__init__.py",
    "content": "\"\"\"\nCode for acquisition strategies.\n\"\"\"\n"
  },
  {
    "path": "bo/acq/acqmap.py",
    "content": "\"\"\"\nClasses to manage acqmap (acquisition maps from xin to acquisition value).\n\"\"\"\n\nfrom argparse import Namespace\nimport numpy as np\nimport copy\nfrom bo.acq.acquisition import Acquisitioner\nfrom bo.util.datatransform import DataTransformer\n#from bo.pp.pp_gp_george import GeorgeGpPP\n#from bo.pp.pp_gp_stan import StanGpPP\nfrom bo.pp.pp_gp_my_distmat import MyGpDistmatPP\n\nclass AcqMapper(object):\n  \"\"\" Class to manage acqmap (acquisition map). \"\"\"\n\n  def __init__(self, data, amp, print_flag=True):\n    \"\"\" Constructor\n        Parameters:\n          amp - Namespace of acqmap params\n          print_flag - True or False\n    \"\"\"\n    self.data = data\n    self.set_am_params(amp)\n    #self.setup_acqmap()\n    if print_flag: self.print_str()\n\n  def set_am_params(self, amp):\n    \"\"\" Set the acqmap params.\n        Inputs:\n          amp - Namespace of acqmap parameters \"\"\"\n    self.amp = amp\n\n  def get_acqmap(self, xin_is_list=True):\n    \"\"\" Return acqmap.\n        Inputs: xin_is_list True if input to acqmap is a list of xin \"\"\"\n    # Potentially do acqmap setup here. Could include inference,\n    # cachining/computing quantities, instantiating objects used in acqmap\n    # definition. This becomes important when we do sequential opt of acqmaps.\n    return self.acqmap_list if xin_is_list else self.acqmap_single\n\n  def acqmap_list(self, xin_list):\n    \"\"\" Acqmap defined on a list of xin. \"\"\"\n\n    def get_trans_data():\n      \"\"\" Returns transformed data. \"\"\"\n      dt = DataTransformer(self.data.y, False)\n      return Namespace(X=self.data.X, y=dt.transform_data(self.data.y))\n\n    def apply_acq_to_pmlist(pmlist, acq_str, trans_data):\n      \"\"\" Apply acquisition to pmlist. \"\"\"\n      acqp = Namespace(acq_str=acq_str, pmout_str='sample')\n      acq = Acquisitioner(trans_data, acqp, False)\n      acqfn = acq.acq_method\n      return [acqfn(p) for p in pmlist]\n\n    def georgegp_acqmap(acq_str):\n      \"\"\" Acqmaps for GeorgeGpPP \"\"\"\n      trans_data = get_trans_data()\n      pp = GeorgeGpPP(trans_data, self.amp.modelp, False)\n      pmlist = pp.sample_pp_pred(self.amp.nppred, xin_list) if acq_str=='ts' \\\n        else pp.sample_pp_post_pred(self.amp.nppred, xin_list)\n      return apply_acq_to_pmlist(pmlist, acq_str, trans_data)\n\n    def stangp_acqmap(acq_str):\n      \"\"\" Acqmaps for StanGpPP \"\"\"\n      trans_data = get_trans_data()\n      pp = StanGpPP(trans_data, self.amp.modelp, False)\n      pp.infer_post_and_update_samples(print_result=True)\n      pmlist, _ = pp.sample_pp_pred(self.amp.nppred, xin_list) if acq_str=='ts' \\\n        else pp.sample_pp_post_pred(self.amp.nppred, xin_list, full_cov=True, \\\n        nloop=np.min([50,self.amp.nppred]))\n      return apply_acq_to_pmlist(pmlist, acq_str, trans_data)\n\n    def mygpdistmat_acqmap(acq_str):\n      \"\"\" Acqmaps for MyGpDistmatPP \"\"\"\n      trans_data = get_trans_data()\n      pp = MyGpDistmatPP(trans_data, self.amp.modelp, False)\n      pp.infer_post_and_update_samples(print_result=True)\n      pmlist, _ = pp.sample_pp_pred(self.amp.nppred, xin_list) if acq_str=='ts' \\\n        else pp.sample_pp_post_pred(self.amp.nppred, xin_list, full_cov=True)\n      return apply_acq_to_pmlist(pmlist, acq_str, trans_data)\n\n    # Mapping of am_str to acqmap\n    if self.amp.am_str=='georgegp_ei':\n      return georgegp_acqmap('ei')\n    elif self.amp.am_str=='georgegp_pi':\n      return georgegp_acqmap('pi')\n    elif self.amp.am_str=='georgegp_ucb':\n      return georgegp_acqmap('ucb')\n    elif self.amp.am_str=='georgegp_ts':\n      return georgegp_acqmap('ts')\n    elif self.amp.am_str=='stangp_ei':\n      return stangp_acqmap('ei')\n    elif self.amp.am_str=='stangp_pi':\n      return stangp_acqmap('pi')\n    elif self.amp.am_str=='stangp_ucb':\n      return stangp_acqmap('ucb')\n    elif self.amp.am_str=='stangp_ts':\n      return stangp_acqmap('ts')\n    elif self.amp.am_str=='mygpdistmat_ei':\n      return mygpdistmat_acqmap('ei')\n    elif self.amp.am_str=='mygpdistmat_pi':\n      return mygpdistmat_acqmap('pi')\n    elif self.amp.am_str=='mygpdistmat_ucb':\n      return mygpdistmat_acqmap('ucb')\n    elif self.amp.am_str=='mygpdistmat_ts':\n      return mygpdistmat_acqmap('ts')\n    elif self.amp.am_str=='null':\n      return [0. for xin in xin_list]\n\n  def acqmap_single(self, xin):\n    \"\"\" Acqmap defined on a single xin. Returns acqmap(xin) value, not list. \"\"\"\n    return self.acqmap_list([xin])[0]\n\n  def print_str(self):\n    \"\"\" Print a description string \"\"\"\n    print('*AcqMapper with amp='+str(self.amp)\n      +'.\\n-----')\n"
  },
  {
    "path": "bo/acq/acqopt.py",
    "content": "\"\"\"\nClasses to perform acquisition function optimization.\n\"\"\"\n\nfrom argparse import Namespace\nimport numpy as np\n\nclass AcqOptimizer(object):\n  \"\"\" Class to perform acquisition function optimization \"\"\"\n\n  def __init__(self, optp=None, print_flag=True):\n    \"\"\" Constructor\n        Inputs:\n          optp - Namespace of opt parameters\n          print_flag - True or False\n    \"\"\"\n    self.set_opt_params(optp)\n    if print_flag: self.print_str()\n\n  def set_opt_params(self, optp):\n    \"\"\" Set the optimizer params.\n        Inputs:\n          acqp - Namespace of acquisition parameters \"\"\"\n    if optp is None:\n      optp = Namespace(opt_str='rand', max_iter=1000)\n    self.optp = optp\n\n  def optimize(self, dom, am):\n    \"\"\" Optimize acqfn(probmap(x)) over x in domain \"\"\"\n    if self.optp.opt_str=='rand':\n      return self.optimize_rand(dom, am)\n\n  def optimize_rand(self, dom, am):\n    \"\"\" Optimize acqmap(x) over domain via random search \"\"\"\n    xin_list = dom.unif_rand_sample(self.optp.max_iter)\n    amlist = am.acqmap_list(xin_list)\n    return xin_list[np.argmin(amlist)]\n\n  # Utilities \n  def print_str(self):\n    \"\"\" print a description string \"\"\"\n    print('*AcqOptimizer with optp='+str(self.optp)\n      +'.\\n-----')\n"
  },
  {
    "path": "bo/acq/acquisition.py",
    "content": "\"\"\"\nClasses to manage acquisition functions.\n\"\"\"\n\nfrom argparse import Namespace\nimport numpy as np\nfrom scipy.stats import norm\n\nclass Acquisitioner(object):\n  \"\"\" Class to manage acquisition functions \"\"\"\n\n  def __init__(self, data, acqp=None, print_flag=True):\n    \"\"\" Constructor\n        Parameters:\n          acqp - Namespace of acquisition parameters\n          print_flag - True or False\n    \"\"\"\n    self.data = data\n    self.set_acq_params(acqp)\n    self.set_acq_method()\n    if print_flag: self.print_str()\n\n  def set_acq_params(self, acqp):\n    \"\"\" Set the acquisition params.\n        Parameters:\n          acqp - Namespace of acquisition parameters \"\"\"\n    if acqp is None:\n      acqp = Namespace(acq_str='ei', pmout_str='sample')\n    self.acqp = acqp\n\n  def set_acq_method(self):\n    \"\"\" Set the acquisition method \"\"\"\n    if self.acqp.acq_str=='ei': self.acq_method = self.ei\n    if self.acqp.acq_str=='pi': self.acq_method = self.pi\n    if self.acqp.acq_str=='ts': self.acq_method = self.ts\n    if self.acqp.acq_str=='ucb': self.acq_method = self.ucb\n    if self.acqp.acq_str=='rand': self.acq_method = self.rand\n    if self.acqp.acq_str=='null': self.acq_method = self.null\n    #if self.acqp.acqStr=='map': return self.map\n\n  def ei(self, pmout):\n    \"\"\" Expected improvement (EI) \"\"\"\n    if self.acqp.pmout_str=='sample':\n      return self.bbacq_ei(pmout)\n\n  def pi(self, pmout):\n    \"\"\" Probability of improvement (PI) \"\"\"\n    if self.acqp.pmout_str=='sample':\n      return self.bbacq_pi(pmout)\n\n  def ucb(self, pmout):\n    \"\"\" Upper (lower) confidence bound (UCB) \"\"\"\n    if self.acqp.pmout_str=='sample':\n      return self.bbacq_ucb(pmout)\n\n  def ts(self, pmout):\n    \"\"\" Thompson sampling (TS) \"\"\"\n    if self.acqp.pmout_str=='sample':\n      return self.bbacq_ts(pmout)\n\n  def rand(self, pmout):\n    \"\"\" Uniform random sampling \"\"\"\n    return np.random.random()\n\n  def null(self, pmout):\n    \"\"\" Return constant 0. \"\"\"\n    return 0.\n\n  # Black Box Acquisition Functions\n  def bbacq_ei(self, pmout_samp, normal=False):\n    \"\"\" Black box acquisition: BB-EI\n        Input: pmout_samp: post-pred samples - np array (shape=(nsamp,1))\n        Returns: EI acq value \"\"\"\n    youts = np.array(pmout_samp).flatten()\n    nsamp = youts.shape[0]\n    if normal:\n      mu = np.mean(youts)\n      sig = np.std(youts)\n      gam = (self.data.y.min() - mu) / sig\n      eiVal = -1*sig*(gam*norm.cdf(gam) + norm.pdf(gam))\n    else:\n      diffs = self.data.y.min() - youts\n      ind_below_min = np.argwhere(diffs>0)\n      eiVal = -1*np.sum(diffs[ind_below_min])/float(nsamp) if \\\n        len(ind_below_min)>0 else 0\n    return eiVal\n\n  def bbacq_pi(self, pmout_samp, normal=False):\n    \"\"\" Black box acquisition: BB-PI\n        Input: pmout_samp: post-pred samples - np array (shape=(nsamp,1))\n        Returns: PI acq value \"\"\"\n    youts = np.array(pmout_samp).flatten()\n    nsamp = youts.shape[0]\n    if normal:\n      mu = np.mean(youts)\n      sig = np.sqrt(np.var(youts))\n      piVal = -1*norm.cdf(self.data.y.min(),loc=mu,scale=sig)\n    else:\n      piVal = -1*len(np.argwhere(youts<self.data.y.min()))/float(nsamp)\n    return piVal\n\n  def bbacq_ucb(self, pmout_samp, beta=0.5, normal=True):\n    \"\"\" Black box acquisition: BB-UCB\n        Input: pmout_samp: post-pred samples - np array (shape=(nsamp,1))\n        Returns: UCB acq value \"\"\"\n    youts = np.array(pmout_samp).flatten()\n    nsamp = youts.shape[0]\n    if normal:\n      ucbVal = np.mean(youts) - beta*np.sqrt(np.var(youts))\n    else:\n      # TODO replace below with nonparametric ucb estimate\n      ucbVal = np.mean(youts) - beta*np.sqrt(np.var(youts))\n    return ucbVal\n\n  def bbacq_ts(self, pmout_samp):\n    \"\"\" Black box acquisition: BB-TS\n        Input: pmout_samp: post-pred samples - np array (shape=(nsamp,1))\n        Returns: TS acq value \"\"\"\n    return pmout_samp.mean()\n\n  # Utilities\n  def print_str(self):\n    \"\"\" print a description string \"\"\"\n    print('*Acquisitioner with acqp='+str(self.acqp)+'.')\n    print('-----')\n"
  },
  {
    "path": "bo/bo/__init__.py",
    "content": "\"\"\"\nCode for Bayesian optimization.\n\"\"\"\n"
  },
  {
    "path": "bo/bo/probo.py",
    "content": "\"\"\"\nClasses for ProBO (probabilistic programming BO) using makept strategy.\n\"\"\"\n\nimport time\nfrom argparse import Namespace\nimport subprocess\nimport os\nimport pickle\nimport numpy as np\nfrom bo.fn.functionhandler import get_fh\nfrom bo.ds.makept import main\n\nclass ProBO(object):\n  \"\"\" Class to carry out ProBO (probabilistic programming BO) \"\"\"\n\n  def __init__(self, fn, search_space, aux_file_path, data=None, probop=None, printFlag=True):\n    \"\"\" Constructor\n        Parameters:\n          fn - Function to query (experiment)\n          data - Initial dataset Namespace (with keys: X, y)\n          probop - probo parameters Namespace\n    \"\"\"\n    self.data = data\n    self.search_space = search_space\n    self.set_probo_params(probop)\n    self.set_fh(fn)\n    self.set_tmpdir()\n    self.auxpkl = aux_file_path\n    if printFlag:\n      self.print_str()\n\n  def set_probo_params(self, probop):\n    \"\"\" Set ProBO parameters \"\"\"\n    self.probop = probop\n\n  def set_fh(self, fn):\n    \"\"\" Set function handler \"\"\"\n    self.fh = get_fh(fn, self.data, self.probop.fhp)\n\n  def set_tmpdir(self):\n    \"\"\" Set tmp directory and files \"\"\"\n    if not os.path.exists(self.probop.tmpdir):\n      os.makedirs(self.probop.tmpdir)\n    self.configpkl = os.path.join(self.probop.tmpdir, 'config.pkl')\n    self.datapkl = os.path.join(self.probop.tmpdir, 'data.pkl')\n    self.nextptpkl = os.path.join(self.probop.tmpdir, 'nextpt.pkl')\n\n  def run_bo(self, verbose=False):\n    \"\"\" Main BO loop. \"\"\"\n    # Serialize makerp \n    with open(self.configpkl, 'wb') as f:\n      pickle.dump(self.probop.makerp, f)\n    print('*Saved self.probop.makerp as ' + self.configpkl + '.\\n-----')\n    # Iterate\n    for iteridx in range(self.probop.niter):\n      starttime = time.time()\n      # Serialize current data\n      with open(self.datapkl, 'wb') as f:\n        pickle.dump(self.data, f)\n\n      if not hasattr(self.probop, 'mode') or self.probop.mode == 'subprocess':\n        subseed = np.random.randint(111111)\n        subprocess.call(['python3', 'bo/ds/makept.py', '--configpkl', self.configpkl,\n                         '--datapkl', self.datapkl, '--nextptpkl',\n                         self.nextptpkl, '--seed', str(subseed)])\n      elif self.probop.mode == 'single_process':\n        args = Namespace(configpkl=self.configpkl, datapkl=self.datapkl, nextptpkl=self.nextptpkl,\n            mode=self.probop.mode, iteridx=iteridx)\n        main(args, self.search_space)\n\n      # Call fn on nextpt\n      nextpt = pickle.load(open(self.nextptpkl, 'rb'))\n      self.fh.call_fn_and_add_data(nextpt)\n      print('FINISHED QUERY', iteridx)\n      if verbose and iteridx % 10 == 0:\n        print('iter', iteridx)\n        print('Data is:')\n        print(self.data.y)\n      itertime = time.time()-starttime\n      if iteridx % 10 == 0:\n        self.print_iter_info(iteridx, itertime)\n      self.post_iteration()\n\n  def print_iter_info(self, iteridx, itertime):\n    \"\"\" Print information at end of an iteration. \"\"\"\n    print('*Last query results: xin = ' + str(self.data.X[-1]) +\n          ', yout = ' + str(self.data.y[-1]) + '.')\n    print('*Timing: iteration took ' + str(itertime) + ' seconds.')\n    print('*Finished ProBO iter = ' + str(iteridx+1) +\n          '.\\n' + '==='*20)\n\n  def print_str(self):\n    \"\"\" print a description string \"\"\"\n    print('*ProBO (using makept) with probop='+str(self.probop)\n          + '.\\n-----')\n\n  def post_iteration(self):\n    pairs = [(self.data.X[i], self.data.y[i]) for i in range(len(self.data.y))]\n    pairs.sort(key = lambda x:x[1])\n    with open(self.auxpkl, 'wb') as f:\n      pickle.dump(pairs, f)\n\n\n\n"
  },
  {
    "path": "bo/dom/__init__.py",
    "content": "\"\"\"\nCode for domain classes.\n\"\"\"\n"
  },
  {
    "path": "bo/dom/list.py",
    "content": "\"\"\"\nClasses for list (discrete set) domains.\n\"\"\"\n\nfrom argparse import Namespace\nimport numpy as np\n\n\nclass ListDomain(object):\n  \"\"\" Class for defining sets defined by a list of elements \"\"\"\n\n  def __init__(self, search_space, domp=None, printFlag=True):\n    \"\"\" Constructor\n        Parameters:\n          domp - domain parameters Namespace\n    \"\"\"\n    self.set_domain_params(domp)\n    self.search_space = search_space\n    self.init_domain_list()\n    if printFlag:\n      self.print_str()\n\n  def set_domain_params(self, domp):\n    \"\"\" Set self.domp Namespace \"\"\"\n    self.domp = domp\n\n  def init_domain_list(self):\n    \"\"\" Initialize self.domain_list. \"\"\"\n    if self.domp.set_domain_list_auto:\n      self.set_domain_list_auto()\n    else:\n      self.domain_list = None\n\n  def set_domain_list_auto(self):\n    self.domain_list = self.search_space.get_arch_list(self.domp.aux_file_path)\n\n  def set_domain_list(self, domain_list):\n    \"\"\" Set self.domain_list, containing elements of domain \"\"\"\n    self.domain_list = domain_list\n\n  def is_in_domain(self, pt):\n    \"\"\" Check if pt is in domain, and return True or False \"\"\"\n    return pt in self.domain_list\n\n  def unif_rand_sample(self, n=1, replace=True):\n    \"\"\" Draws a sample uniformly at random from domain, returns as a list of\n        len n, with (default) or without replacement. \"\"\"\n    if replace:\n      randind = np.random.randint(len(self.domain_list), size=n)\n    else:\n      randind = np.arange(min(n, len(self.domain_list)))\n    return [self.domain_list[i] for i in randind]\n\n  def print_str(self):\n    \"\"\" Print a description string \"\"\"\n    print('*ListDomain with domp = ' + str(self.domp) + '.')\n    print('-----')\n"
  },
  {
    "path": "bo/dom/real.py",
    "content": "\"\"\"\nClasses for real coordinate space domains.\n\"\"\"\n\nfrom argparse import Namespace\nimport numpy as np\n\nclass RealDomain(object):\n  \"\"\" Class for defining sets in real coordinate (Euclidean) space \"\"\"\n\n  def __init__(self, domp=None, printFlag=True):\n    \"\"\" Constructor\n        Parameters:\n          domp - domain parameters Namespace\n    \"\"\"\n    self.set_domain_params(domp)\n    self.ndimx = self.domp.ndimx\n    if printFlag:\n      self.print_str()\n\n  def set_domain_params(self, domp):\n    \"\"\" Set self.domp Namespace \"\"\"\n    if domp is None:\n      domp = Namespace()\n      domp.ndimx = 1\n      domp.min_max = [(0,1)]*domp.ndimx\n    self.domp = domp\n\n  def is_in_domain(self, pt):\n    \"\"\" Check if pt is in domain, and return True or False \"\"\"\n    pt = np.array(pt).reshape(-1)\n    if pt.shape[0] != self.ndimx:\n      ret=False\n    else:\n      bool_list = [pt[i]>=self.domp.min_max[i][0] and\n        pt[i]<=self.domp.min_max[i][1] for i in range(self.ndimx)]\n      ret=False if False in bool_list else True\n    return ret\n\n  def unif_rand_sample(self, n=1):\n    \"\"\" Draws a sample uniformly at random from domain \"\"\"\n    li = [np.random.uniform(mm[0], mm[1], n) for mm in self.domp.min_max]\n    return np.array(li).T\n\n  def print_str(self):\n    \"\"\" Print a description string \"\"\"\n    print('*RealDomain with domp = ' + str(self.domp) + '.')\n    print('-----')\n"
  },
  {
    "path": "bo/ds/__init__.py",
    "content": "\"\"\"\nCode for makept (serializing and subprocesses) strategy.\n\"\"\"\n"
  },
  {
    "path": "bo/ds/makept.py",
    "content": "\"\"\"\nMake a point in a domain, and serialize it.\n\"\"\"\n\nimport sys\nimport os\nsys.path.append(os.path.expanduser('./'))\nfrom argparse import Namespace, ArgumentParser\nimport pickle\nimport time\nimport numpy as np\nfrom bo.dom.real import RealDomain\nfrom bo.dom.list import ListDomain\nfrom bo.acq.acqmap import AcqMapper\nfrom bo.acq.acqopt import AcqOptimizer\n\ndef main(args, search_space, printinfo=False):\n  starttime = time.time()\n  \n  # Load config and data\n  makerp = pickle.load(open(args.configpkl, 'rb'))\n  data = pickle.load(open(args.datapkl, 'rb'))\n\n  if hasattr(args, 'mode') and args.mode == 'single_process':\n    makerp.domp.mode = args.mode\n    makerp.domp.iteridx = args.iteridx\n    makerp.amp.modelp.mode = args.mode\n  else:\n    np.random.seed(args.seed)\n  # Instantiate Domain, AcqMapper, AcqOptimizer\n  dom = get_domain(makerp.domp, search_space)\n  am = AcqMapper(data, makerp.amp, False)\n  ao = AcqOptimizer(makerp.optp, False)\n  # Optimize over domain to get nextpt \n  nextpt = ao.optimize(dom, am)\n  # Serialize nextpt\n  with open(args.nextptpkl, 'wb') as f:\n    pickle.dump(nextpt, f)\n  # Print\n  itertime = time.time()-starttime\n  if printinfo: print_info(nextpt, itertime, args.nextptpkl)\n\ndef get_domain(domp, search_space):\n  \"\"\" Return Domain object. \"\"\"\n  if not hasattr(domp, 'dom_str'):\n    domp.dom_str = 'real'\n  if domp.dom_str=='real':\n    return RealDomain(domp, False)\n  elif domp.dom_str=='list':\n    return ListDomain(search_space, domp, False)\n\ndef print_info(nextpt, itertime, nextptpkl):\n  print('*Found nextpt = ' + str(nextpt) + '.')\n  print('*Saved nextpt as ' + nextptpkl + '.')\n  print('*Timing: makept took ' + str(itertime) + ' seconds.')\n  print('-----')\n\nif __name__ == \"__main__\":\n  parser = ArgumentParser(description='Args for a single instance of acquisition optimization.')\n  parser.add_argument('--seed', dest='seed', type=int, default=1111)\n  parser.add_argument('--configpkl', dest='configpkl', type=str, default='config.pkl')\n  parser.add_argument('--datapkl', dest='datapkl', type=str, default='data.pkl')\n  parser.add_argument('--nextptpkl', dest='nextptpkl', type=str, default='nextpt.pkl')\n  args = parser.parse_args()\n  main(args, printinfo=False)\n"
  },
  {
    "path": "bo/fn/__init__.py",
    "content": "\"\"\"\nCode for synthetic functions to query (perform experiment on).\n\"\"\"\n"
  },
  {
    "path": "bo/fn/functionhandler.py",
    "content": "\"\"\"\nClasses to handle functions.\n\"\"\"\n\nfrom argparse import Namespace\nimport numpy as np\n\ndef get_fh(fn, data=None, fhp=None, print_flag=True):\n  \"\"\" Returns a function handler object \"\"\"\n  if fhp is None:\n    fhp=Namespace(fhstr='basic', namestr='noname')\n  # Return FH object\n  if fhp.fhstr=='basic':\n    return BasicFH(fn, data, fhp, print_flag)\n  elif fhp.fhstr=='extrainfo':\n    return ExtraInfoFH(fn, data, fhp, print_flag)\n  elif fhp.fhstr=='nannn':\n    return NanNNFH(fn, data, fhp, print_flag)\n  elif fhp.fhstr=='replacenannn':\n    return ReplaceNanNNFH(fn, data, fhp, print_flag)\n  elif fhp.fhstr=='object':\n    return ObjectFH(fn, data, fhp, print_flag)\n\n\nclass BasicFH(object):\n  \"\"\" Class to handle basic functions, which map from an array xin to a real\n      value yout. \"\"\"\n\n  def __init__(self, fn, data=None, fhp=None, print_flag=True):\n    \"\"\" Constructor.\n        Inputs:\n          pmp - Namespace of probmap params\n          print_flag - True or False\n    \"\"\"\n    self.fn = fn\n    self.data = data\n    self.fhp = fhp\n    if print_flag: self.print_str()\n\n  def call_fn_and_add_data(self, xin):\n    \"\"\" Call self.fn(xin), and update self.data \"\"\"\n    yout = self.fn(xin)\n    print('new datapoint score', yout)\n    self.add_data_single(xin, yout)\n\n  def add_data_single(self, xin, yout):\n    \"\"\" Update self.data with a single xin yout pair.\n        Inputs:\n          xin: np.array size=(1, -1)\n          yout: np.array size=(1, 1) \"\"\"\n    xin = np.array(xin).reshape(1, -1)\n    yout = np.array(yout).reshape(1, 1)\n    newdata = Namespace(X=xin, y=yout)\n    self.add_data(newdata)\n\n  def add_data(self, newdata):\n    \"\"\" Update self.data with newdata Namespace.\n        Inputs:\n          newdata: Namespace with fields X and y \"\"\"\n    if self.data is None:\n      self.data = newdata\n    else:\n      self.data.X = np.concatenate((self.data.X, newdata.X), 0)\n      self.data.y = np.concatenate((self.data.y, newdata.y), 0)\n\n  def print_str(self):\n    \"\"\" Print a description string. \"\"\"\n    print('*BasicFH with fhp='+str(self.fhp)\n      +'.\\n-----')\n\n\nclass ExtraInfoFH(BasicFH):\n  \"\"\" Class to handle functions that map from an array xin to a real\n      value yout, but also return extra info \"\"\"\n\n  def __init__(self, fn, data=None, fhp=None, print_flag=True):\n    \"\"\" Constructor.\n        Inputs:\n          pmp - Namespace of probmap params\n          print_flag - True or False\n    \"\"\"\n    super(ExtraInfoFH, self).__init__(fn, data, fhp, False)\n    self.extrainfo = []\n    if print_flag: self.print_str()\n\n  def call_fn_and_add_data(self, xin):\n    \"\"\" Call self.fn(xin), and update self.data \"\"\"\n    yout, exinf = self.fn(xin)\n    self.add_data_single(xin, yout)\n    self.extrainfo.append(exinf)\n\n  def print_str(self):\n    \"\"\" Print a description string. \"\"\"\n    print('*ExtraInfoFH with fhp='+str(self.fhp)\n      +'.\\n-----')\n\n\nclass NanNNFH(BasicFH):\n  \"\"\" Class to handle NN functions that map from an array xin to either\n      a real value yout or np.NaN, but also return extra info \"\"\"\n\n  def __init__(self, fn, data=None, fhp=None, print_flag=True):\n    \"\"\" Constructor.\n        Inputs:\n          pmp - Namespace of probmap params\n          print_flag - True or False\n    \"\"\"\n    super(NanNNFH, self).__init__(fn, data, fhp, False)\n    self.extrainfo = []\n    if print_flag: self.print_str()\n\n  def call_fn_and_add_data(self, xin):\n    \"\"\" Call self.fn(xin), and update self.data \"\"\"\n    timethresh = 60.\n    yout, walltime = self.fn(xin)\n    if walltime > timethresh:\n      self.add_data_single_nan(xin)\n    else:\n      self.add_data_single(xin, yout)\n      self.possibly_init_xnan()\n    exinf = Namespace(xin=xin, yout=yout, walltime=walltime)\n    self.extrainfo.append(exinf)\n\n  def add_data_single_nan(self, xin):\n    \"\"\" Update self.data.X_nan with a single xin.\n        Inputs:\n          xin: np.array size=(1, -1) \"\"\"\n    xin = xin.reshape(1,-1)\n    newdata = Namespace(X = np.ones((0, xin.shape[1])),\n                        y = np.ones((0, 1)),\n                        X_nan = xin)\n    self.add_data_nan(newdata)\n\n  def add_data_nan(self, newdata):\n    \"\"\" Update self.data with newdata Namespace.\n        Inputs:\n          newdata: Namespace with fields X, y, X_nan \"\"\"\n    if self.data is None:\n      self.data = newdata\n    else:\n      self.data.X_nan = np.concatenate((self.data.X_nan, newdata.X_nan), 0)\n\n  def possibly_init_xnan(self):\n    \"\"\" If self.data doesn't have X_nan, then create it. \"\"\"\n    if not hasattr(self.data, 'X_nan'):\n      self.data.X_nan = np.ones((0, self.data.X.shape[1]))\n\n  def print_str(self):\n    \"\"\" Print a description string. \"\"\"\n    print('*NanNNFH with fhp='+str(self.fhp)\n      +'.\\n-----')\n\n\nclass ReplaceNanNNFH(BasicFH):\n  \"\"\" Class to handle NN functions that map from an array xin to either\n      a real value yout or np.NaN. If np.NaN, we replace it with a large\n      positive value. We also return extra info \"\"\"\n\n  def __init__(self, fn, data=None, fhp=None, print_flag=True):\n    \"\"\" Constructor.\n        Inputs:\n          pmp - Namespace of probmap params\n          print_flag - True or False\n    \"\"\"\n    super(ReplaceNanNNFH, self).__init__(fn, data, fhp, False)\n    self.extrainfo = []\n    if print_flag: self.print_str()\n\n  def call_fn_and_add_data(self, xin):\n    \"\"\" Call self.fn(xin), and update self.data \"\"\"\n    timethresh = 60.\n    replace_nan_val = 5.\n    yout, walltime = self.fn(xin)\n    if walltime > timethresh:\n      yout = replace_nan_val\n    self.add_data_single(xin, yout)\n    exinf = Namespace(xin=xin, yout=yout, walltime=walltime)\n    self.extrainfo.append(exinf)\n\n  def print_str(self):\n    \"\"\" Print a description string. \"\"\"\n    print('*ReplaceNanNNFH with fhp='+str(self.fhp)\n      +'.\\n-----')\n\n\nclass ObjectFH(object):\n  \"\"\" Class to handle basic functions, which map from some object xin to a real\n      value yout. \"\"\"\n\n  def __init__(self, fn, data=None, fhp=None, print_flag=True):\n    \"\"\" Constructor.\n        Inputs:\n          pmp - Namespace of probmap params\n          print_flag - True or False\n    \"\"\"\n    self.fn = fn\n    self.data = data\n    self.fhp = fhp\n    if print_flag: self.print_str()\n\n  def call_fn_and_add_data(self, xin):\n    \"\"\" Call self.fn(xin), and update self.data \"\"\"\n    yout = self.fn(xin)\n    self.add_data_single(xin, yout)\n\n  def add_data_single(self, xin, yout):\n    \"\"\" Update self.data with a single xin yout pair. \"\"\"\n    newdata = Namespace(X=[xin], y=np.array(yout).reshape(1, 1))\n    self.add_data(newdata)\n\n  def add_data(self, newdata):\n    \"\"\" Update self.data with newdata Namespace.\n        Inputs:\n          newdata: Namespace with fields X and y \"\"\"\n    if self.data is None:\n      self.data = newdata\n    else:\n      self.data.X.extend(newdata.X)\n      self.data.y = np.concatenate((self.data.y, newdata.y), 0)\n\n  def print_str(self):\n    \"\"\" Print a description string. \"\"\"\n    print('*ObjectFH with fhp='+str(self.fhp)\n      +'.\\n-----')\n"
  },
  {
    "path": "bo/pp/__init__.py",
    "content": "\"\"\"\nCode for defining and running probabilistic programs.\n\"\"\"\n"
  },
  {
    "path": "bo/pp/gp/__init__.py",
    "content": "\"\"\"\nCode for Gaussian process (GP) utilities and functions.\n\"\"\"\n"
  },
  {
    "path": "bo/pp/gp/gp_utils.py",
    "content": "\"\"\"\nUtilities for Gaussian process (GP) inference\n\"\"\"\n\nimport numpy as np\nfrom scipy.linalg import solve_triangular\nfrom scipy.spatial.distance import cdist \n#import GPy as gpy\n\n\ndef kern_gibbscontext(xmatcon1, xmatcon2, xmatact1, xmatact2, theta, alpha,\n  lscon, whichlsfn=1):\n  \"\"\" Gibbs kernel (ls_fn of context only) \"\"\"\n  actdim = xmatact1.shape[1]\n  lsarr1 = ls_fn(xmatcon1, theta, whichlsfn).flatten()\n  lsarr2 = ls_fn(xmatcon2, theta, whichlsfn).flatten()\n  sum_sq_ls = np.add.outer(lsarr1, lsarr2)\n  inexp = -1. * np.divide(cdist(xmatact1, xmatact2, 'sqeuclidean'), sum_sq_ls)\n  prod_ls = np.outer(lsarr1, lsarr2)\n  #coef = np.power(np.divide(2*prod_ls, sum_sq_ls), actdim/2.) # Correct\n  coef = 1.\n  kern_gibbscontext_only_ns = np.multiply(coef, np.exp(inexp))\n  kern_expquad_ns = kern_exp_quad_noscale(xmatcon1, xmatcon2, lscon)\n  return alpha**2 * np.multiply(kern_gibbscontext_only_ns, kern_expquad_ns)\n\ndef kern_gibbs1d(xmat1, xmat2, theta, alpha):\n  \"\"\" Gibbs kernel in 1d \"\"\"\n  lsarr1 = ls_fn(xmat1, theta).flatten()\n  lsarr2 = ls_fn(xmat2, theta).flatten()\n  sum_sq_ls = np.add.outer(lsarr1, lsarr2)\n  prod_ls = np.outer(lsarr1, lsarr2) #TODO product of this for each dim\n  coef = np.sqrt(np.divide(2*prod_ls, sum_sq_ls))\n  inexp = cdist(xmat1, xmat2, 'sqeuclidean') / sum_sq_ls #TODO sum of this for each dim\n  return alpha**2 * coef * np.exp(-1 * inexp)\n\ndef ls_fn(xmat, theta, whichlsfn=1):\n  theta = np.array(theta).reshape(-1,1)\n  if theta.shape[0]==2:\n    if whichlsfn==1 or whichlsfn==2:\n      return np.log(1 + np.exp(theta[0][0] + np.matmul(xmat,theta[1])))   # softplus transform\n    elif whichlsfn==3:\n      return np.exp(theta[0][0] + np.matmul(xmat,theta[1]))               # exp transform\n  elif theta.shape[0]==3:\n    if whichlsfn==1:\n      return np.log(1 + np.exp(theta[0][0] + np.matmul(xmat,theta[1]) +\n        np.matmul(np.power(xmat,2),theta[2])))                            # softplus transform\n    elif whichlsfn==2:\n      return np.log(1 + np.exp(theta[0][0] + np.matmul(xmat,theta[1]) +\n        np.matmul(np.abs(xmat),theta[2])))                                # softplus on abs transform\n    elif whichlsfn==3:\n      return np.exp(theta[0][0] + np.matmul(xmat,theta[1]) +\n        np.matmul(np.power(xmat,2),theta[2]))                             # exp transform\n  else:\n    print('ERROR: theta parameter is incorrect.')\n\ndef kern_matern32(xmat1, xmat2, ls, alpha):\n  \"\"\" Matern 3/2 kernel, currently using GPy \"\"\"\n  kern = gpy.kern.Matern32(input_dim=xmat1.shape[1], variance=alpha**2,\n    lengthscale=ls)\n  return kern.K(xmat1,xmat2)\n\ndef kern_exp_quad(xmat1, xmat2, ls, alpha):\n  \"\"\" Exponentiated quadratic kernel function aka squared exponential kernel\n      aka RBF kernel \"\"\"\n  return alpha**2 * kern_exp_quad_noscale(xmat1, xmat2, ls)\n\ndef kern_exp_quad_noscale(xmat1, xmat2, ls):\n  \"\"\" Exponentiated quadratic kernel function aka squared exponential kernel\n      aka RBF kernel, without scale parameter. \"\"\"\n  sq_norm = (-1/(2 * ls**2)) * cdist(xmat1, xmat2, 'sqeuclidean')\n  return np.exp(sq_norm)\n\ndef squared_euc_distmat(xmat1, xmat2, coef=1.):\n  \"\"\" Distance matrix of squared euclidean distance (multiplied by coef)\n      between points in xmat1 and xmat2. \"\"\"\n  return coef * cdist(xmat1, xmat2, 'sqeuclidean')\n\ndef kern_distmat(xmat1, xmat2, ls, alpha, distfn):\n  \"\"\" Kernel for a given distmat, via passed-in distfn (which is assumed to be\n      fn of xmat1 and xmat2 only) \"\"\"\n  distmat = distfn(xmat1, xmat2)\n  sq_norm = -distmat / ls**2\n  return alpha**2 * np.exp(sq_norm)\n\ndef get_cholesky_decomp(k11_nonoise, sigma, psd_str):\n  \"\"\" Returns cholesky decomposition \"\"\"\n  if psd_str == 'try_first':\n    k11 = k11_nonoise + sigma**2 * np.eye(k11_nonoise.shape[0])\n    try:\n      return stable_cholesky(k11, False)\n    except np.linalg.linalg.LinAlgError:\n      return get_cholesky_decomp(k11_nonoise, sigma, 'project_first')\n  elif psd_str == 'project_first':\n    k11_nonoise = project_symmetric_to_psd_cone(k11_nonoise)\n    return get_cholesky_decomp(k11_nonoise, sigma, 'is_psd')\n  elif psd_str == 'is_psd':\n    k11 = k11_nonoise + sigma**2 * np.eye(k11_nonoise.shape[0])\n    return stable_cholesky(k11)\n\ndef stable_cholesky(mmat, make_psd=True):\n  \"\"\" Returns a 'stable' cholesky decomposition of mmat \"\"\"\n  if mmat.size == 0:\n    return mmat\n  try:\n    lmat = np.linalg.cholesky(mmat)\n  except np.linalg.linalg.LinAlgError as e:\n    if not make_psd:\n      raise e\n    diag_noise_power = -11\n    max_mmat = np.diag(mmat).max()\n    diag_noise = np.diag(mmat).max() * 1e-11\n    break_loop = False\n    while not break_loop:\n      try:\n        lmat = np.linalg.cholesky(mmat + ((10**diag_noise_power) * max_mmat)  *\n          np.eye(mmat.shape[0]))\n        break_loop = True\n      except np.linalg.linalg.LinAlgError:\n        if diag_noise_power > -9:\n          print('stable_cholesky failed with diag_noise_power=%d.'%(diag_noise_power))\n        diag_noise_power += 1\n      if diag_noise_power >= 5:\n        print('***** stable_cholesky failed: added diag noise = %e'%(diag_noise))\n  return lmat\n\ndef project_symmetric_to_psd_cone(mmat, is_symmetric=True, epsilon=0):\n  \"\"\" Project symmetric matrix mmat to the PSD cone \"\"\"\n  if is_symmetric:\n    try:\n      eigvals, eigvecs = np.linalg.eigh(mmat)\n    except np.linalg.LinAlgError:\n      print('LinAlgError encountered with np.eigh. Defaulting to eig.')\n      eigvals, eigvecs = np.linalg.eig(mmat)\n      eigvals = np.real(eigvals)\n      eigvecs = np.real(eigvecs)\n  else:\n    eigvals, eigvecs = np.linalg.eig(mmat)\n  clipped_eigvals = np.clip(eigvals, epsilon, np.inf)\n  return (eigvecs * clipped_eigvals).dot(eigvecs.T)\n\ndef solve_lower_triangular(amat, b):\n  \"\"\" Solves amat*x=b when amat is lower triangular \"\"\"\n  return solve_triangular_base(amat, b, lower=True)\n\ndef solve_upper_triangular(amat, b):\n  \"\"\" Solves amat*x=b when amat is upper triangular \"\"\"\n  return solve_triangular_base(amat, b, lower=False)\n\ndef solve_triangular_base(amat, b, lower):\n  \"\"\" Solves amat*x=b when amat is a triangular matrix. \"\"\"\n  if amat.size == 0 and b.shape[0] == 0:\n    return np.zeros((b.shape))\n  else:\n    return solve_triangular(amat, b, lower=lower)\n\ndef sample_mvn(mu, covmat, nsamp):\n  \"\"\" Sample from multivariate normal distribution with mean mu and covariance\n      matrix covmat \"\"\"\n  mu = mu.reshape(-1,)\n  ndim = len(mu)\n  lmat = stable_cholesky(covmat)\n  umat = np.random.normal(size=(ndim, nsamp))\n  return lmat.dot(umat).T + mu\n"
  },
  {
    "path": "bo/pp/pp_core.py",
    "content": "\"\"\"\nBase classes for probabilistic programs.\n\"\"\"\n\nimport pickle\n\nclass DiscPP(object):\n  \"\"\" Parent class for discriminative probabilistic programs \"\"\"\n\n  def __init__(self):\n    \"\"\" Constructor \"\"\"\n    self.sample_list = []\n    if not hasattr(self,'data'):\n      raise NotImplementedError('Implement var data in a child class')\n    #if not hasattr(self,'ndimx'):\n      #raise NotImplementedError('Implement var ndimx in a child class')\n    #if not hasattr(self,'ndataInit'):\n      #raise NotImplementedError('Implement var ndataInit in a child class')\n\n  def infer_post_and_update_samples(self,nsamp):\n    \"\"\" Run an inference algorithm (given self.data), draw samples from the\n        posterior, and store in self.sample_list. \"\"\"\n    raise NotImplementedError('Implement method in a child class')\n\n  def sample_pp_post_pred(self,nsamp,input_list):\n    \"\"\" Sample nsamp times from PP posterior predictive, for each x-input in\n    input_list \"\"\"\n    raise NotImplementedError('Implement method in a child class')\n\n  def sample_pp_pred(self,nsamp,input_list,lv_list=None):\n    \"\"\" Sample nsamp times from PP predictive for parameter lv, for each\n    x-input in input_list. If lv is None, draw it uniformly at random\n    from self.sample_list. \"\"\"\n    raise NotImplementedError('Implement method in a child class')\n\n  def add_new_data(self,newData):\n    \"\"\" Add data (newData) to self.data \"\"\"\n    raise NotImplementedError('Implement method in a child class')\n\n  def get_namespace_to_save(self):\n    \"\"\" Return namespace containing object info (to save to file) \"\"\"\n    raise NotImplementedError('Implement method in a child class')\n\n  def save_namespace_to_file(self,fileStr,printFlag):\n    \"\"\" Saves results from get_namespace_to_save in fileStr \"\"\"\n    ppNamespaceToSave = self.get_namespace_to_save()\n    ff = open(fileStr,'wb')\n    pickle.dump(ppNamespaceToSave,ff)\n    ff.close()\n    if printFlag:\n      print('*Saved DiscPP Namespace in pickle file: ' +fileStr+'\\n-----')\n"
  },
  {
    "path": "bo/pp/pp_gp_george.py",
    "content": "\"\"\"\nClasses for hierarchical GP models with George PP\n\"\"\"\n\nfrom argparse import Namespace\nimport numpy as np\nimport scipy.optimize as spo\nimport george\nimport emcee\nfrom bo.pp.pp_core import DiscPP\n\nclass GeorgeGpPP(DiscPP):\n  \"\"\" Hierarchical GPs implemented with George \"\"\"\n\n  def __init__(self,data=None,modelp=None,printFlag=True):\n    \"\"\" Constructor \"\"\"\n    self.set_data(data)\n    self.set_model_params(modelp)\n    self.ndimx = self.modelp.ndimx\n    self.set_kernel()\n    self.set_model()\n    super(GeorgeGpPP,self).__init__()\n    if printFlag:\n      self.print_str()\n\n  def set_data(self,data):\n    if data is None:\n      pass #TODO: handle case where there's no data\n    self.data = data\n\n  def set_model_params(self,modelp):\n    if modelp is None:\n      modelp = Namespace(ndimx=1, noiseVar=1e-3, kernLs=1.5, kernStr='mat',\n        fitType='mle')\n    self.modelp = modelp\n\n  def set_kernel(self):\n    \"\"\" Set kernel for GP \"\"\"\n    if self.modelp.kernStr=='mat':\n      self.kernel = self.data.y.var() * \\\n        george.kernels.Matern52Kernel(self.modelp.kernLs, ndim=self.ndimx)\n    if self.modelp.kernStr=='rbf': # NOTE: periodically produces errors\n      self.kernel = self.data.y.var() * \\\n        george.kernels.ExpSquaredKernel(self.modelp.kernLs, ndim=self.ndimx)\n\n  def set_model(self):\n    \"\"\" Set GP regression model \"\"\"\n    self.model = self.get_model()\n    self.model.compute(self.data.X)\n    self.fit_hyperparams(printOut=False)\n\n  def get_model(self):\n    \"\"\" Returns GPRegression model \"\"\"\n    return george.GP(kernel=self.kernel,fit_mean=True)\n  \n  def fit_hyperparams(self,printOut=False):\n    if self.modelp.fitType=='mle':\n      spo.minimize(self.neg_log_like, self.model.get_parameter_vector(),\n        jac=True)\n    elif self.modelp.fitType=='bayes':\n      self.nburnin = 200\n      nsamp = 200\n      nwalkers = 36\n      gpdim = len(self.model)\n      self.sampler = emcee.EnsembleSampler(nwalkers, gpdim, self.log_post)\n      p0 = self.model.get_parameter_vector() + 1e-4*np.random.randn(nwalkers,\n        gpdim)\n      print 'Running burn-in.'\n      p0, _, _ = self.sampler.run_mcmc(p0, self.nburnin)\n      print 'Running main chain.'\n      self.sampler.run_mcmc(p0, nsamp)\n    if printOut:\n      print 'Final GP hyperparam (in opt or MCMC chain):'\n      print self.model.get_parameter_dict()\n\n  def infer_post_and_update_samples(self):\n    \"\"\" Update self.sample_list \"\"\"\n    self.sample_list = [None] #TODO: need to not-break ts fn in maker_bayesopt.py\n\n  def sample_pp_post_pred(self,nsamp,input_list):\n    \"\"\" Sample from posterior predictive of PP.\n        Inputs:\n          input_list - list of np arrays size=(-1,)\n        Returns:\n          list (len input_list) of np arrays (size=(nsamp,1)).\"\"\"\n    inputArray = np.array(input_list)\n    if self.modelp.fitType=='mle':\n      inputArray = np.array(input_list)\n      ppredArray = self.model.sample_conditional(self.data.y.flatten(),\n        inputArray, nsamp).T\n    elif self.modelp.fitType=='bayes':\n      ppredArray = np.zeros(shape=[len(input_list),nsamp])\n      for s in range(nsamp):\n        walkidx = np.random.randint(self.sampler.chain.shape[0])\n        sampidx = np.random.randint(self.nburnin, self.sampler.chain.shape[1])\n        hparamSamp = self.sampler.chain[walkidx, sampidx]\n        print 'hparamSamp = ' + str(hparamSamp) # TODO: remove print statement\n        self.model.set_parameter_vector(hparamSamp)\n        ppredArray[:,s] = self.model.sample_conditional(self.data.y.flatten(),\n          inputArray, 1).flatten()\n    return list(ppredArray) # each element is row in ppredArray matrix\n\n  def sample_pp_pred(self,nsamp,input_list,lv=None):\n    \"\"\" Sample from predictive of PP for parameter lv.\n        Returns: list (len input_list) of np arrays (size (nsamp,1)).\"\"\"\n    if self.modelp.fitType=='bayes':\n      print('*WARNING: fitType=bayes not implemented for sample_pp_pred. \\\n        Reverting to fitType=mle')\n      # TODO: Equivalent algo for fitType=='bayes':\n      #   - draw posterior sample path over all xin in input_list\n      #   - draw pred samples around sample path pt, based on noise model\n    inputArray = np.array(input_list)\n    samplePath = self.model.sample_conditional(self.data.y.flatten(),\n      inputArray).reshape(-1,)\n    return [np.random.normal(s,np.sqrt(self.modelp.noiseVar),nsamp).reshape(-1,)\n      for s in samplePath]\n\n  def neg_log_like(self,hparams):\n    \"\"\" Compute and return the negative log likelihood for model\n        hyperparameters hparams, as well as its gradient. \"\"\"\n    self.model.set_parameter_vector(hparams)\n    g = self.model.grad_log_likelihood(self.data.y.flatten(), quiet=True)\n    return -self.model.log_likelihood(self.data.y.flatten(), quiet=True), -g\n\n  def log_post(self,hparams):\n    \"\"\" Compute and return the log posterior density (up to constant of\n        proportionality) for the model hyperparameters hparams. \"\"\"\n    # Uniform prior between -100 and 100, for each hyperparam\n    if np.any((-100 > hparams[1:]) + (hparams[1:] > 100)):\n      return -np.inf\n    self.model.set_parameter_vector(hparams)\n    return self.model.log_likelihood(self.data.y.flatten(), quiet=True)\n\n  # Utilities\n  def print_str(self):\n    \"\"\" Print a description string \"\"\"\n    print '*GeorgeGpPP with modelp='+str(self.modelp)+'.'\n    print '-----'\n"
  },
  {
    "path": "bo/pp/pp_gp_my_distmat.py",
    "content": "\"\"\"\nClasses for GP models without any PP backend, using a given distance matrix.\n\"\"\"\n\nfrom argparse import Namespace\nimport time\nimport copy\nimport numpy as np\nfrom scipy.spatial.distance import cdist \nfrom bo.pp.pp_core import DiscPP\nfrom bo.pp.gp.gp_utils import kern_exp_quad, kern_matern32, \\\n  get_cholesky_decomp, solve_upper_triangular, solve_lower_triangular, \\\n  sample_mvn, squared_euc_distmat, kern_distmat\nfrom bo.util.print_utils import suppress_stdout_stderr\n\n\nclass MyGpDistmatPP(DiscPP):\n  \"\"\" GPs using a kernel specified by a given distance matrix, without any PP\n      backend \"\"\"\n\n  def __init__(self, data=None, modelp=None, printFlag=True):\n    \"\"\" Constructor \"\"\"\n    self.set_model_params(modelp)\n    self.set_data(data)\n    self.set_model()\n    super(MyGpDistmatPP,self).__init__()\n    if printFlag:\n      self.print_str()\n\n  def set_model_params(self, modelp):\n    \"\"\" Set self.modelp \"\"\"\n    if modelp is None:\n      pass #TODO\n    self.modelp = modelp\n\n  def set_data(self, data):\n    \"\"\" Set self.data \"\"\"\n    if data is None:\n      pass #TODO\n    self.data_init = copy.deepcopy(data)\n    self.data = copy.deepcopy(self.data_init)\n\n  def set_model(self):\n    \"\"\" Set GP regression model \"\"\"\n    self.model = self.get_model()\n\n  def get_model(self):\n    \"\"\" Returns model object \"\"\"\n    return None\n\n  def infer_post_and_update_samples(self, print_result=False):\n    \"\"\" Update self.sample_list \"\"\"\n    self.sample_list = [Namespace(ls=self.modelp.kernp.ls,\n                                  alpha=self.modelp.kernp.alpha,\n                                  sigma=self.modelp.kernp.sigma)]\n    if print_result: self.print_inference_result()\n\n  def get_distmat(self, xmat1, xmat2):\n    \"\"\" Get distance matrix \"\"\"\n    #return squared_euc_distmat(xmat1, xmat2, .5)\n    \n    from data import Data\n    self.distmat = Data.generate_distance_matrix\n    #print('distmat')\n    #print(self.distmat(xmat1, xmat2, self.modelp.distance))\n    return self.distmat(xmat1, xmat2, self.modelp.distance)\n\n  def print_inference_result(self):\n    \"\"\" Print results of stan inference \"\"\"\n    print('*ls pt est = '+str(self.sample_list[0].ls)+'.')\n    print('*alpha pt est = '+str(self.sample_list[0].alpha)+'.')\n    print('*sigma pt est = '+str(self.sample_list[0].sigma)+'.')\n    print('-----')\n\n  def sample_pp_post_pred(self, nsamp, input_list, full_cov=False):\n    \"\"\" Sample from posterior predictive of PP.\n        Inputs:\n          input_list - list of np arrays size=(-1,)\n        Returns:\n          list (len input_list) of np arrays (size=(nsamp,1)).\"\"\"\n    samp = self.sample_list[0]\n    postmu, postcov = self.gp_post(self.data.X, self.data.y, input_list,\n                                   samp.ls, samp.alpha, samp.sigma, full_cov)\n    if full_cov:\n      ppred_list = list(sample_mvn(postmu, postcov, nsamp))\n    else:\n      ppred_list = list(np.random.normal(postmu.reshape(-1,),\n                                         postcov.reshape(-1,),\n                                         size=(nsamp, len(input_list))))\n    return list(np.stack(ppred_list).T), ppred_list\n\n  def sample_pp_pred(self, nsamp, input_list, lv=None):\n    \"\"\" Sample from predictive of PP for parameter lv.\n        Returns: list (len input_list) of np arrays (size (nsamp,1)).\"\"\"\n    if lv is None:\n      lv = self.sample_list[0]\n    postmu, postcov = self.gp_post(self.data.X, self.data.y, input_list, lv.ls,\n                                   lv.alpha, lv.sigma)\n    pred_list = list(sample_mvn(postmu, postcov, 1)) ###TODO: sample from this mean nsamp times\n    return list(np.stack(pred_list).T), pred_list\n\n  def gp_post(self, x_train_list, y_train_arr, x_pred_list, ls, alpha, sigma,\n              full_cov=True):\n    \"\"\" Compute parameters of GP posterior \"\"\"\n    kernel = lambda a, b, c, d: kern_distmat(a, b, c, d, self.get_distmat)\n    k11_nonoise = kernel(x_train_list, x_train_list, ls, alpha)\n    lmat = get_cholesky_decomp(k11_nonoise, sigma, 'try_first')\n    smat = solve_upper_triangular(lmat.T, solve_lower_triangular(lmat,\n                                  y_train_arr))\n    k21 = kernel(x_pred_list, x_train_list, ls, alpha)\n    mu2 = k21.dot(smat)\n    k22 = kernel(x_pred_list, x_pred_list, ls, alpha)\n    vmat = solve_lower_triangular(lmat, k21.T)\n    k2 = k22 - vmat.T.dot(vmat)\n    if full_cov is False:\n      k2 = np.sqrt(np.diag(k2))\n    return mu2, k2\n\n  # Utilities\n  def print_str(self):\n    \"\"\" Print a description string \"\"\"\n    print('*MyGpDistmatPP with modelp='+str(self.modelp)+'.')\n    print('-----')\n"
  },
  {
    "path": "bo/pp/pp_gp_stan.py",
    "content": "\"\"\"\nClasses for GP models with Stan\n\"\"\"\n\nfrom argparse import Namespace\nimport time\nimport numpy as np\nimport copy\nfrom bo.pp.pp_core import DiscPP\nimport bo.pp.stan.gp_hier2 as gpstan2\nimport bo.pp.stan.gp_hier3 as gpstan3\nimport bo.pp.stan.gp_hier2_matern as gpstan2_matern\nfrom bo.pp.gp.gp_utils import kern_exp_quad, kern_matern32, \\\n  get_cholesky_decomp, solve_upper_triangular, solve_lower_triangular, \\\n  sample_mvn\nfrom bo.util.print_utils import suppress_stdout_stderr\n\nclass StanGpPP(DiscPP):\n  \"\"\" Hierarchical GPs implemented with Stan \"\"\"\n\n  def __init__(self, data=None, modelp=None, printFlag=True):\n    \"\"\" Constructor \"\"\"\n    self.set_model_params(modelp)\n    self.set_data(data)\n    self.ndimx = self.modelp.ndimx\n    self.set_model()\n    super(StanGpPP,self).__init__()\n    if printFlag:\n      self.print_str()\n\n  def set_model_params(self,modelp):\n    if modelp is None:\n      modelp = Namespace(ndimx=1, model_str='optfixedsig',\n        gp_mean_transf_str='constant')\n      if modelp.model_str=='optfixedsig':\n        modelp.kernp = Namespace(u1=.1, u2=5., n1=10., n2=10., sigma=1e-5)\n        modelp.infp = Namespace(niter=1000)\n      elif modelp.model_str=='opt' or modelp.model_str=='optmatern32':\n        modelp.kernp = Namespace(ig1=1., ig2=5., n1=10., n2=20., n3=.01,\n          n4=.01)\n        modelp.infp = Namespace(niter=1000)\n      elif modelp.model_str=='samp' or modelp.model_str=='sampmatern32':\n        modelp.kernp = Namespace(ig1=1., ig2=5., n1=10., n2=20., n3=.01,\n          n4=.01)\n        modelp.infp = Namespace(niter=1500, nwarmup=500)\n    self.modelp = modelp\n\n  def set_data(self, data):\n    \"\"\" Set self.data \"\"\"\n    if data is None:\n      pass #TODO: handle case where there's no data\n    self.data_init = copy.deepcopy(data)\n    self.data = self.get_transformed_data(self.data_init,\n      self.modelp.gp_mean_transf_str)\n\n  def get_transformed_data(self, data, transf_str='linear'):\n    \"\"\" Transform data, for non-zero-mean GP \"\"\"\n    newdata = Namespace(X=data.X)\n    if transf_str=='linear':\n      mmat,_,_,_ = np.linalg.lstsq(np.concatenate([data.X,\n        np.ones((data.X.shape[0],1))],1), data.y.flatten(), rcond=None)\n      self.gp_mean_vec = lambda x: np.matmul(np.concatenate([x,\n        np.ones((x.shape[0],1))],1), mmat)\n      newdata.y = data.y - self.gp_mean_vec(data.X).reshape(-1,1)\n    if transf_str=='constant':\n      yconstant = data.y.mean()\n      #yconstant = 0. \n      self.gp_mean_vec = lambda x: np.array([yconstant for xcomp in x])\n      newdata.y = data.y - self.gp_mean_vec(data.X).reshape(-1,1)\n    return newdata\n\n  def set_model(self):\n    \"\"\" Set GP regression model \"\"\"\n    self.model = self.get_model()\n\n  def get_model(self):\n    \"\"\" Returns GPRegression model \"\"\"\n    if self.modelp.model_str=='optfixedsig':\n      return gpstan3.get_model(print_status=False)\n    elif self.modelp.model_str=='opt' or self.modelp.model_str=='samp':\n      return gpstan2.get_model(print_status=False)\n    elif self.modelp.model_str=='optmatern32' or \\\n      self.modelp.model_str=='sampmatern32':\n      return gpstan2_matern.get_model(print_status=False)\n\n  def infer_post_and_update_samples(self, seed=5000012, print_result=False):\n    \"\"\" Update self.sample_list \"\"\"\n    data_dict = self.get_stan_data_dict()\n    with suppress_stdout_stderr():\n      if self.modelp.model_str=='optfixedsig' or self.modelp.model_str=='opt' \\\n        or self.modelp.model_str=='optmatern32':\n        stanout = self.model.optimizing(data_dict, iter=self.modelp.infp.niter,\n          #seed=seed, as_vector=True, algorithm='Newton')\n          seed=seed, as_vector=True, algorithm='LBFGS')\n      elif self.modelp.model_str=='samp' or self.modelp.model_str=='sampmatern32':\n        stanout = self.model.sampling(data_dict, iter=self.modelp.infp.niter +\n          self.modelp.infp.nwarmup, warmup=self.modelp.infp.nwarmup, chains=1,\n          seed=seed, refresh=1000)\n      print('-----')\n    self.sample_list = self.get_sample_list_from_stan_out(stanout)\n    if print_result: self.print_inference_result()\n\n  def get_stan_data_dict(self):\n    \"\"\" Return data dict for stan sampling method \"\"\"\n    if self.modelp.model_str=='optfixedsig':\n      return {'u1':self.modelp.kernp.u1, 'u2':self.modelp.kernp.u2,\n              'n1':self.modelp.kernp.n1, 'n2':self.modelp.kernp.n2,\n              'sigma':self.modelp.kernp.sigma, 'D':self.ndimx,\n              'N':len(self.data.X), 'x':self.data.X, 'y':self.data.y.flatten()}\n    elif self.modelp.model_str=='opt' or self.modelp.model_str=='samp':\n      return {'ig1':self.modelp.kernp.ig1, 'ig2':self.modelp.kernp.ig2,\n              'n1':self.modelp.kernp.n1, 'n2':self.modelp.kernp.n2,\n              'n3':self.modelp.kernp.n3, 'n4':self.modelp.kernp.n4,\n              'D':self.ndimx, 'N':len(self.data.X), 'x':self.data.X,\n              'y':self.data.y.flatten()}\n    elif self.modelp.model_str=='optmatern32' or \\\n      self.modelp.model_str=='sampmatern32':\n      return {'ig1':self.modelp.kernp.ig1, 'ig2':self.modelp.kernp.ig2,\n              'n1':self.modelp.kernp.n1, 'n2':self.modelp.kernp.n2,\n              'n3':self.modelp.kernp.n3, 'n4':self.modelp.kernp.n4,\n              'D':self.ndimx, 'N':len(self.data.X), 'x':self.data.X,\n              'y':self.data.y.flatten(), 'covid':2}\n\n  def get_sample_list_from_stan_out(self, stanout):\n    \"\"\" Convert stan output to sample_list \"\"\"\n    if self.modelp.model_str=='optfixedsig':\n      return [Namespace(ls=stanout['rho'], alpha=stanout['alpha'],\n        sigma=self.modelp.kernp.sigma)]\n    elif self.modelp.model_str=='opt' or self.modelp.model_str=='optmatern32':\n      return [Namespace(ls=stanout['rho'], alpha=stanout['alpha'],\n        sigma=stanout['sigma'])]\n    elif self.modelp.model_str=='samp' or \\\n      self.modelp.model_str=='sampmatern32':\n      sdict = stanout.extract(['rho','alpha','sigma'])\n      return [Namespace(ls=sdict['rho'][i], alpha=sdict['alpha'][i],\n        sigma=sdict['sigma'][i]) for i in range(sdict['rho'].shape[0])]\n\n  def print_inference_result(self):\n    \"\"\" Print results of stan inference \"\"\"\n    if self.modelp.model_str=='optfixedsig' or self.modelp.model_str=='opt' or \\\n      self.modelp.model_str=='optmatern32':\n      print('*ls pt est = '+str(self.sample_list[0].ls)+'.')\n      print('*alpha pt est = '+str(self.sample_list[0].alpha)+'.')\n      print('*sigma pt est = '+str(self.sample_list[0].sigma)+'.')\n    elif self.modelp.model_str=='samp' or \\\n      self.modelp.model_str=='sampmatern32':\n      ls_arr = np.array([ns.ls for ns in self.sample_list])\n      alpha_arr = np.array([ns.alpha for ns in self.sample_list])\n      sigma_arr = np.array([ns.sigma for ns in self.sample_list])\n      print('*ls mean = '+str(ls_arr.mean())+'.')\n      print('*ls std = '+str(ls_arr.std())+'.')\n      print('*alpha mean = '+str(alpha_arr.mean())+'.')\n      print('*alpha std = '+str(alpha_arr.std())+'.')\n      print('*sigma mean = '+str(sigma_arr.mean())+'.')\n      print('*sigma std = '+str(sigma_arr.std())+'.')\n    print('-----')\n\n  def sample_pp_post_pred(self, nsamp, input_list, full_cov=False, nloop=None):\n    \"\"\" Sample from posterior predictive of PP.\n        Inputs:\n          input_list - list of np arrays size=(-1,)\n        Returns:\n          list (len input_list) of np arrays (size=(nsamp,1)).\"\"\"\n    if self.modelp.model_str=='optfixedsig' or self.modelp.model_str=='opt' or \\\n      self.modelp.model_str=='optmatern32':\n      nloop = 1\n      sampids = [0]\n    elif self.modelp.model_str=='samp' or \\\n      self.modelp.model_str=='sampmatern32':\n      if nloop is None: nloop=nsamp\n      nsamp = int(nsamp/nloop)\n      sampids = np.random.randint(len(self.sample_list), size=(nloop,))\n    ppred_list = []\n    for i in range(nloop):\n      samp = self.sample_list[sampids[i]]\n      postmu, postcov = self.gp_post(self.data.X, self.data.y,\n        np.stack(input_list), samp.ls, samp.alpha, samp.sigma, full_cov)\n      if full_cov:\n        ppred_list.extend(list(sample_mvn(postmu, postcov, nsamp)))\n      else:\n        ppred_list.extend(list(np.random.normal(postmu.reshape(-1,),\n          postcov.reshape(-1,), size=(nsamp, len(input_list)))))\n    return self.get_reverse_transform(list(np.stack(ppred_list).T), ppred_list,\n      input_list)\n\n  def sample_pp_pred(self, nsamp, input_list, lv=None):\n    \"\"\" Sample from predictive of PP for parameter lv.\n        Returns: list (len input_list) of np arrays (size (nsamp,1)).\"\"\"\n    x_pred = np.stack(input_list)\n    if lv is None:\n      if self.modelp.model_str=='optfixedsig' or self.modelp.model_str=='opt' \\\n        or self.modelp.model_str=='optmatern32':\n        lv = self.sample_list[0]\n      elif self.modelp.model_str=='samp' or \\\n        self.modelp.model_str=='sampmatern32':\n        lv = self.sample_list[np.random.randint(len(self.sample_list))]\n    postmu, postcov = self.gp_post(self.data.X, self.data.y, x_pred, lv.ls,\n      lv.alpha, lv.sigma)\n    pred_list = list(sample_mvn(postmu, postcov, 1)) ###TODO: sample from this mean nsamp times\n    return self.get_reverse_transform(list(np.stack(pred_list).T), pred_list,\n      input_list)\n\n  def get_reverse_transform(self, pp1, pp2, input_list):\n    \"\"\" Apply reverse of data transform to ppred or pred \"\"\"\n    pp1 = [pp1[i] + self.gp_mean_vec(input_list[i].reshape(1,-1)) for i in\n           range(len(input_list))]\n    pp2 = [psamp + self.gp_mean_vec(np.array(input_list)) for psamp in pp2]\n    return pp1, pp2\n\n  def gp_post(self, x_train, y_train, x_pred, ls, alpha, sigma, full_cov=True):\n    \"\"\" Compute parameters of GP posterior \"\"\"\n    if self.modelp.model_str=='optmatern32' or \\\n      self.modelp.model_str=='sampmatern32':\n      kernel = kern_matern32\n    else:\n      kernel = kern_exp_quad\n    k11_nonoise = kernel(x_train, x_train, ls, alpha)\n    lmat = get_cholesky_decomp(k11_nonoise, sigma, 'try_first')\n    smat = solve_upper_triangular(lmat.T, solve_lower_triangular(lmat, y_train))\n    k21 = kernel(x_pred, x_train, ls, alpha)\n    mu2 = k21.dot(smat)\n    k22 = kernel(x_pred, x_pred, ls, alpha)\n    vmat = solve_lower_triangular(lmat, k21.T)\n    k2 = k22 - vmat.T.dot(vmat)\n    if full_cov is False:\n      k2 = np.sqrt(np.diag(k2))\n    return mu2, k2\n\n  # Utilities\n  def print_str(self):\n    \"\"\" Print a description string \"\"\"\n    print('*StanGpPP with modelp='+str(self.modelp)+'.')\n    print('-----')\n"
  },
  {
    "path": "bo/pp/pp_gp_stan_distmat.py",
    "content": "\"\"\"\nClasses for GP models with Stan, using a given distance matrix.\n\"\"\"\n\nfrom argparse import Namespace\nimport time\nimport copy\nimport numpy as np\nfrom scipy.spatial.distance import cdist \nfrom bo.pp.pp_core import DiscPP\nimport bo.pp.stan.gp_distmat as gpstan\nimport bo.pp.stan.gp_distmat_fixedsig as gpstan_fixedsig\nfrom bo.pp.gp.gp_utils import kern_exp_quad, kern_matern32, \\\n  get_cholesky_decomp, solve_upper_triangular, solve_lower_triangular, \\\n  sample_mvn, squared_euc_distmat, kern_distmat\nfrom bo.util.print_utils import suppress_stdout_stderr\n\nclass StanGpDistmatPP(DiscPP):\n  \"\"\" Hierarchical GPs using a given distance matrix, implemented with Stan \"\"\"\n\n  def __init__(self, data=None, modelp=None, printFlag=True):\n    \"\"\" Constructor \"\"\"\n    self.set_model_params(modelp)\n    self.set_data(data)\n    self.ndimx = self.modelp.ndimx\n    self.set_model()\n    super(StanGpDistmatPP,self).__init__()\n    if printFlag:\n      self.print_str()\n\n  def set_model_params(self, modelp):\n    \"\"\" Set self.modelp \"\"\"\n    if modelp is None:\n      pass #TODO\n    self.modelp = modelp\n\n  def set_data(self, data):\n    \"\"\" Set self.data \"\"\"\n    if data is None:\n      pass #TODO\n    self.data_init = copy.deepcopy(data)\n    self.data = copy.deepcopy(self.data_init)\n\n  def set_model(self):\n    \"\"\" Set GP regression model \"\"\"\n    self.model = self.get_model()\n\n  def get_model(self):\n    \"\"\" Returns GPRegression model \"\"\"\n    if self.modelp.model_str=='optfixedsig' or \\\n      self.modelp.model_str=='sampfixedsig':\n      return gpstan_fixedsig.get_model(print_status=True)\n    elif self.modelp.model_str=='opt' or self.modelp.model_str=='samp':\n      return gpstan.get_model(print_status=True)\n    elif self.modelp.model_str=='fixedparam':\n      return None\n\n  def infer_post_and_update_samples(self, seed=543210, print_result=False):\n    \"\"\" Update self.sample_list \"\"\"\n    data_dict = self.get_stan_data_dict()\n    with suppress_stdout_stderr():\n      if self.modelp.model_str=='optfixedsig' or self.modelp.model_str=='opt':\n        stanout = self.model.optimizing(data_dict, iter=self.modelp.infp.niter,\n          #seed=seed, as_vector=True, algorithm='Newton')\n          seed=seed, as_vector=True, algorithm='LBFGS')\n      elif self.modelp.model_str=='samp' or self.modelp.model_str=='sampfixedsig':\n        stanout = self.model.sampling(data_dict, iter=self.modelp.infp.niter +\n          self.modelp.infp.nwarmup, warmup=self.modelp.infp.nwarmup, chains=1,\n          seed=seed, refresh=1000)\n      elif self.modelp.model_str=='fixedparam':\n        stanout = None\n      print('-----')\n    self.sample_list = self.get_sample_list_from_stan_out(stanout)\n    if print_result: self.print_inference_result()\n\n  def get_stan_data_dict(self):\n    \"\"\" Return data dict for stan sampling method \"\"\"\n    if self.modelp.model_str=='optfixedsig' or \\\n      self.modelp.model_str=='sampfixedsig':\n      return {'ig1':self.modelp.kernp.ig1, 'ig2':self.modelp.kernp.ig2,\n              'n1':self.modelp.kernp.n1, 'n2':self.modelp.kernp.n2,\n              'sigma':self.modelp.kernp.sigma, 'D':self.ndimx,\n              'N':len(self.data.X), 'y':self.data.y.flatten(),\n              'distmat':self.get_distmat(self.data.X, self.data.X)}\n    elif self.modelp.model_str=='opt' or self.modelp.model_str=='samp':\n      return {'ig1':self.modelp.kernp.ig1, 'ig2':self.modelp.kernp.ig2,\n              'n1':self.modelp.kernp.n1, 'n2':self.modelp.kernp.n2,\n              'n3':self.modelp.kernp.n3, 'n4':self.modelp.kernp.n4,\n              'D':self.ndimx, 'N':len(self.data.X), 'y':self.data.y.flatten(),\n              'distmat':self.get_distmat(self.data.X, self.data.X)}\n\n  def get_distmat(self, xmat1, xmat2):\n    \"\"\" Get distance matrix \"\"\"\n    # For now, will compute squared euc distance * .5, on self.data.X\n    return squared_euc_distmat(xmat1, xmat2, .5)\n\n  def get_sample_list_from_stan_out(self, stanout):\n    \"\"\" Convert stan output to sample_list \"\"\"\n    if self.modelp.model_str=='optfixedsig':\n      return [Namespace(ls=stanout['rho'], alpha=stanout['alpha'],\n        sigma=self.modelp.kernp.sigma)]\n    elif self.modelp.model_str=='opt':\n      return [Namespace(ls=stanout['rho'], alpha=stanout['alpha'],\n        sigma=stanout['sigma'])]\n    elif self.modelp.model_str=='sampfixedsig':\n      sdict = stanout.extract(['rho','alpha'])\n      return [Namespace(ls=sdict['rho'][i], alpha=sdict['alpha'][i],\n        sigma=self.modelp.kernp.sigma) for i in range(sdict['rho'].shape[0])]\n    elif self.modelp.model_str=='samp':\n      sdict = stanout.extract(['rho','alpha','sigma'])\n      return [Namespace(ls=sdict['rho'][i], alpha=sdict['alpha'][i],\n        sigma=sdict['sigma'][i]) for i in range(sdict['rho'].shape[0])]\n    elif self.modelp.model_str=='fixedparam':\n      return [Namespace(ls=self.modelp.kernp.ls, alpha=self.modelp.kernp.alpha,\n        sigma=self.modelp.kernp.sigma)]\n\n  def print_inference_result(self):\n    \"\"\" Print results of stan inference \"\"\"\n    if self.modelp.model_str=='optfixedsig' or self.modelp.model_str=='opt' or \\\n      self.modelp.model_str=='fixedparam':\n      print('*ls pt est = '+str(self.sample_list[0].ls)+'.')\n      print('*alpha pt est = '+str(self.sample_list[0].alpha)+'.')\n      print('*sigma pt est = '+str(self.sample_list[0].sigma)+'.')\n    elif self.modelp.model_str=='samp' or \\\n      self.modelp.model_str=='sampfixedsig':\n      ls_arr = np.array([ns.ls for ns in self.sample_list])\n      alpha_arr = np.array([ns.alpha for ns in self.sample_list])\n      sigma_arr = np.array([ns.sigma for ns in self.sample_list])\n      print('*ls mean = '+str(ls_arr.mean())+'.')\n      print('*ls std = '+str(ls_arr.std())+'.')\n      print('*alpha mean = '+str(alpha_arr.mean())+'.')\n      print('*alpha std = '+str(alpha_arr.std())+'.')\n      print('*sigma mean = '+str(sigma_arr.mean())+'.')\n      print('*sigma std = '+str(sigma_arr.std())+'.')\n    print('-----')\n\n  def sample_pp_post_pred(self, nsamp, input_list, full_cov=False, nloop=None):\n    \"\"\" Sample from posterior predictive of PP.\n        Inputs:\n          input_list - list of np arrays size=(-1,)\n        Returns:\n          list (len input_list) of np arrays (size=(nsamp,1)).\"\"\"\n    if self.modelp.model_str=='optfixedsig' or self.modelp.model_str=='opt' or \\\n        self.modelp.model_str=='fixedparam':\n      nloop = 1\n      sampids = [0]\n    elif self.modelp.model_str=='samp' or \\\n      self.modelp.model_str=='sampfixedsig':\n      if nloop is None: nloop=nsamp\n      nsamp = int(nsamp/nloop)\n      sampids = np.random.randint(len(self.sample_list), size=(nloop,))\n    ppred_list = []\n    for i in range(nloop):\n      samp = self.sample_list[sampids[i]]\n      postmu, postcov = self.gp_post(self.data.X, self.data.y,\n        np.stack(input_list), samp.ls, samp.alpha, samp.sigma, full_cov)\n      if full_cov:\n        ppred_list.extend(list(sample_mvn(postmu, postcov, nsamp)))\n      else:\n        ppred_list.extend(list(np.random.normal(postmu.reshape(-1,),\n          postcov.reshape(-1,), size=(nsamp, len(input_list)))))\n    return list(np.stack(ppred_list).T), ppred_list\n\n  def sample_pp_pred(self, nsamp, input_list, lv=None):\n    \"\"\" Sample from predictive of PP for parameter lv.\n        Returns: list (len input_list) of np arrays (size (nsamp,1)).\"\"\"\n    x_pred = np.stack(input_list)\n    if lv is None:\n      if self.modelp.model_str=='optfixedsig' or self.modelp.model_str=='opt' \\\n        or self.modelp.model_str=='fixedparam':\n        lv = self.sample_list[0]\n      elif self.modelp.model_str=='samp' or \\\n        self.modelp.model_str=='sampfixedsig':\n        lv = self.sample_list[np.random.randint(len(self.sample_list))]\n    postmu, postcov = self.gp_post(self.data.X, self.data.y, x_pred, lv.ls,\n      lv.alpha, lv.sigma)\n    pred_list = list(sample_mvn(postmu, postcov, 1)) ###TODO: sample from this mean nsamp times\n    return list(np.stack(pred_list).T), pred_list\n\n  def gp_post(self, x_train, y_train, x_pred, ls, alpha, sigma, full_cov=True):\n    \"\"\" Compute parameters of GP posterior \"\"\"\n    kernel = lambda a, b, c, d: kern_distmat(a, b, c, d, self.get_distmat)\n    k11_nonoise = kernel(x_train, x_train, ls, alpha)\n    lmat = get_cholesky_decomp(k11_nonoise, sigma, 'try_first')\n    smat = solve_upper_triangular(lmat.T, solve_lower_triangular(lmat, y_train))\n    k21 = kernel(x_pred, x_train, ls, alpha)\n    mu2 = k21.dot(smat)\n    k22 = kernel(x_pred, x_pred, ls, alpha)\n    vmat = solve_lower_triangular(lmat, k21.T)\n    k2 = k22 - vmat.T.dot(vmat)\n    if full_cov is False:\n      k2 = np.sqrt(np.diag(k2))\n    return mu2, k2\n\n  # Utilities\n  def print_str(self):\n    \"\"\" Print a description string \"\"\"\n    print('*StanGpDistmatPP with modelp='+str(self.modelp)+'.')\n    print('-----')\n"
  },
  {
    "path": "bo/pp/stan/__init__.py",
    "content": "\"\"\"\nCode for defining and compiling models in Stan.\n\"\"\"\n"
  },
  {
    "path": "bo/pp/stan/compile_stan.py",
    "content": "\"\"\"\nScript to compile stan models\n\"\"\"\n\n#import pp_new.stan.gp_hier2 as gpstan\n#import pp_new.stan.gp_hier3 as gpstan\n#import pp_new.stan.gp_hier2_matern as gpstan\nimport pp_new.stan.gp_distmat as gpstan\n#import pp_new.stan.gp_distmat_fixedsig as gpstan\n\n\n# Recompile model and return it\nmodel = gpstan.get_model(recompile=True)\n"
  },
  {
    "path": "bo/pp/stan/gp_distmat.py",
    "content": "\"\"\"\nFunctions to define and compile PPs in Stan, for model:\nhierarchical GP (prior on rho, alpha, sigma) using a given distance matrix.\n\"\"\"\n\nimport time\nimport pickle\nimport pystan\n\ndef get_model(recompile=False, print_status=True):\n  model_file_str = 'bo/pp/stan/hide_model/gp_distmat.pkl'\n\n  if recompile:\n    starttime = time.time()\n    model = pystan.StanModel(model_code=get_model_code())\n    buildtime = time.time()-starttime\n    with open(model_file_str,'wb') as f:\n      pickle.dump(model, f)\n    if print_status:\n      print('*Time taken to compile = '+ str(buildtime) +' seconds.\\n-----')\n      print('*Model saved in file ' + model_file_str + '.\\n-----')\n  else:\n    model = pickle.load(open(model_file_str,'rb'))\n    if print_status:\n      print('*Model loaded from file ' + model_file_str + '.\\n-----')\n  return model\n\n\ndef get_model_code():\n  \"\"\" Parse modelp and return stan model code \"\"\"\n  return \"\"\"\n  data {\n    int<lower=1> N;\n    matrix[N, N] distmat;\n    vector[N] y;\n    real<lower=0> ig1;\n    real<lower=0> ig2;\n    real<lower=0> n1;\n    real<lower=0> n2;\n    real<lower=0> n3;\n    real<lower=0> n4;\n  }\n\n  parameters {\n    real<lower=0> rho;\n    real<lower=0> alpha;\n    real<lower=0.0001> sigma;\n  }\n\n  model {\n    matrix[N, N] cov = square(alpha) * exp(-distmat / square(rho))\n                       + diag_matrix(rep_vector(square(sigma), N));\n    matrix[N, N] L_cov = cholesky_decompose(cov);\n    rho ~ inv_gamma(ig1, ig2);\n    alpha ~ normal(n1, n2);\n    sigma ~ normal(n3, n4);\n    y ~ multi_normal_cholesky(rep_vector(0, N), L_cov);\n  }\n  \"\"\"\n\nif __name__ == '__main__':\n  get_model()\n"
  },
  {
    "path": "bo/pp/stan/gp_distmat_fixedsig.py",
    "content": "\"\"\"\nFunctions to define and compile PPs in Stan, for model:\nhierarchical GP (prior on rho, alpha) and fixed sigma, using a given\ndistance matrix.\n\"\"\"\n\nimport time\nimport pickle\nimport pystan\n\ndef get_model(recompile=False, print_status=True):\n  model_file_str = 'bo/pp/stan/hide_model/gp_distmat_fixedsig.pkl'\n\n  if recompile:\n    starttime = time.time()\n    model = pystan.StanModel(model_code=get_model_code())\n    buildtime = time.time()-starttime\n    with open(model_file_str,'wb') as f:\n      pickle.dump(model, f)\n    if print_status:\n      print('*Time taken to compile = '+ str(buildtime) +' seconds.\\n-----')\n      print('*Model saved in file ' + model_file_str + '.\\n-----')\n  else:\n    model = pickle.load(open(model_file_str,'rb'))\n    if print_status:\n      print('*Model loaded from file ' + model_file_str + '.\\n-----')\n  return model\n\n\ndef get_model_code():\n  \"\"\" Parse modelp and return stan model code \"\"\"\n  return \"\"\"\n  data {\n    int<lower=1> N;\n    matrix[N, N] distmat;\n    vector[N] y;\n    real<lower=0> ig1;\n    real<lower=0> ig2;\n    real<lower=0> n1;\n    real<lower=0> n2;\n    real<lower=0> sigma;\n  }\n\n  parameters {\n    real<lower=0> rho;\n    real<lower=0> alpha;\n  }\n\n  model {\n    matrix[N, N] cov = square(alpha) * exp(-distmat / square(rho))\n                       + diag_matrix(rep_vector(square(sigma), N));\n    matrix[N, N] L_cov = cholesky_decompose(cov);\n    rho ~ inv_gamma(ig1, ig2);\n    alpha ~ normal(n1, n2);\n    y ~ multi_normal_cholesky(rep_vector(0, N), L_cov);\n  }\n  \"\"\"\n\nif __name__ == '__main__':\n  get_model()\n"
  },
  {
    "path": "bo/pp/stan/gp_hier2.py",
    "content": "\"\"\"\nFunctions to define and compile PPs in Stan, for model:\nhierarchical GP (prior on rho, alpha, sigma)\n\"\"\"\n\nimport time\nimport pickle\nimport pystan\n\ndef get_model(recompile=False, print_status=True):\n  model_file_str = 'bo/pp/stan/hide_model/gp_hier2.pkl'\n\n  if recompile:\n    starttime = time.time()\n    model = pystan.StanModel(model_code=get_model_code())\n    buildtime = time.time()-starttime\n    with open(model_file_str,'wb') as f:\n      pickle.dump(model, f)\n    if print_status:\n      print('*Time taken to compile = '+ str(buildtime) +' seconds.\\n-----')\n      print('*Model saved in file ' + model_file_str + '.\\n-----')\n  else:\n    model = pickle.load(open(model_file_str,'rb'))\n    if print_status:\n      print('*Model loaded from file ' + model_file_str + '.\\n-----')\n  return model\n\n\ndef get_model_code():\n  \"\"\" Parse modelp and return stan model code \"\"\"\n  return \"\"\"\n  data {\n    int<lower=1> D;\n    int<lower=1> N;\n    vector[D] x[N];\n    vector[N] y;\n    real<lower=0> ig1;\n    real<lower=0> ig2;\n    real<lower=0> n1;\n    real<lower=0> n2;\n    real<lower=0> n3;\n    real<lower=0> n4;\n  }\n\n  parameters {\n    real<lower=0> rho;\n    real<lower=0> alpha;\n    real<lower=0.0001> sigma;\n  }\n\n  model {\n    matrix[N, N] cov =   cov_exp_quad(x, alpha, rho)\n                       + diag_matrix(rep_vector(square(sigma), N));\n    matrix[N, N] L_cov = cholesky_decompose(cov);\n    rho ~ inv_gamma(ig1, ig2);\n    alpha ~ normal(n1, n2);\n    sigma ~ normal(n3, n4);\n    y ~ multi_normal_cholesky(rep_vector(0, N), L_cov);\n  }\n  \"\"\"\n\nif __name__ == '__main__':\n  get_model()\n"
  },
  {
    "path": "bo/pp/stan/gp_hier2_matern.py",
    "content": "\"\"\"\nFunctions to define and compile PPs in Stan, for model: hierarchical GP (prior\non rho, alpha, sigma), with matern kernel\n\"\"\"\n\nimport time\nimport pickle\nimport pystan\n\ndef get_model(recompile=False, print_status=True):\n  model_file_str = 'bo/pp/stan/hide_model/gp_hier2_matern.pkl'\n\n  if recompile:\n    starttime = time.time()\n    model = pystan.StanModel(model_code=get_model_code())\n    buildtime = time.time()-starttime\n    with open(model_file_str,'wb') as f:\n      pickle.dump(model, f)\n    if print_status:\n      print('*Time taken to compile = '+ str(buildtime) +' seconds.\\n-----')\n      print('*Model saved in file ' + model_file_str + '.\\n-----')\n  else:\n    model = pickle.load(open(model_file_str,'rb'))\n    if print_status:\n      print('*Model loaded from file ' + model_file_str + '.\\n-----')\n  return model\n\n\ndef get_model_code():\n  \"\"\" Parse modelp and return stan model code \"\"\"\n  return \"\"\"\n  functions {\n    matrix distance_matrix_single(int N, vector[] x) {\n      matrix[N, N] distmat;\n      for(i in 1:(N-1)) {\n        for(j in (i+1):N) {\n          distmat[i, j] = distance(x[i], x[j]);\n        }\n      }\n      return distmat;\n    }\n\n    matrix matern_covariance(int N, matrix dist, real ls, real alpha_sq, int COVFN) {\n      matrix[N,N] S;\n      real dist_ls; \n      real sqrt3;\n      real sqrt5;\n      sqrt3=sqrt(3.0);\n      sqrt5=sqrt(5.0);\n      \n      // exponential == Matern nu=1/2 , (p=0; nu=p+1/2)\n      if (COVFN==1) {\n        for(i in 1:(N-1)) {\n          for(j in (i+1):N) {\n            dist_ls = fabs(dist[i,j])/ls;\n            S[i,j] = alpha_sq * exp(- dist_ls ); \n          }\n        }\n      }\n\n      // Matern nu= 3/2 covariance\n      else if (COVFN==2) {\n        for(i in 1:(N-1)) {\n          for(j in (i+1):N) {\n           dist_ls = fabs(dist[i,j])/ls;\n           S[i,j] = alpha_sq * (1 + sqrt3 * dist_ls) * exp(-sqrt3 * dist_ls);\n          }\n        }\n      }\n      \n      // Matern nu=5/2 covariance\n      else if (COVFN==3) { \n        for(i in 1:(N-1)) {\n          for(j in (i+1):N) {\n            dist_ls = fabs(dist[i,j])/ls;\n            S[i,j] = alpha_sq * (1 + sqrt5 *dist_ls + 5* pow(dist_ls,2)/3) * exp(-sqrt5 *dist_ls);\n          }\n        }\n      }\n\n      // Matern as nu->Inf become Gaussian (aka squared exponential cov)\n      else if (COVFN==4) {\n        for(i in 1:(N-1)) {\n          for(j in (i+1):N) {\n            dist_ls = fabs(dist[i,j])/ls;\n            S[i,j] = alpha_sq * exp( -pow(dist_ls,2)/2 ) ;\n          }\n        }\n      } \n\n      // fill upper triangle\n      for(i in 1:(N-1)) {\n        for(j in (i+1):N) {\n          S[j,i] = S[i,j];\n        }\n      }\n\n      // create diagonal: nugget(nonspatial) + spatial variance +  eps ensures positive definiteness\n      for(i in 1:N) {\n        S[i,i] = alpha_sq;            \n      }\n\n      return S;\n    }\n  }\n\n  data {\n    int<lower=1> D;\n    int<lower=1> N;\n    vector[D] x[N];\n    vector[N] y;\n    real<lower=0> ig1;\n    real<lower=0> ig2;\n    real<lower=0> n1;\n    real<lower=0> n2;\n    real<lower=0> n3;\n    real<lower=0> n4;\n    int covid;\n  }\n\n  parameters {\n    real<lower=0> rho;\n    real<lower=0> alpha;\n    real<lower=0.0001> sigma;\n  }\n\n  model {\n    matrix[N, N] distmat = distance_matrix_single(N, x);\n    matrix[N, N] cov = matern_covariance(N, distmat, rho, square(alpha), covid);\n    matrix[N, N] L_cov = cholesky_decompose(cov);\n    rho ~ inv_gamma(ig1, ig2);\n    alpha ~ normal(n1, n2);\n    sigma ~ normal(n3, n4);\n    y ~ multi_normal_cholesky(rep_vector(0, N), L_cov);\n  }\n  \"\"\"\n\nif __name__ == '__main__':\n  get_model()\n"
  },
  {
    "path": "bo/pp/stan/gp_hier3.py",
    "content": "\"\"\"\nFunctions to define and compile PPs in Stan, for model:\nhierarchical GP with uniform prior on rho, normal prior on alpha,\nand fixed sigma\n\"\"\"\n\nimport time\nimport pickle\nimport pystan\n\ndef get_model(recompile=False, print_status=True):\n  model_file_str = 'bo/pp/stan/hide_model/gp_hier3.pkl'\n\n  if recompile:\n    starttime = time.time()\n    model = pystan.StanModel(model_code=get_model_code())\n    buildtime = time.time()-starttime\n    with open(model_file_str,'wb') as f:\n      pickle.dump(model, f)\n    if print_status:\n      print('*Time taken to compile = '+ str(buildtime) +' seconds.\\n-----')\n      print('*Model saved in file ' + model_file_str + '.\\n-----')\n  else:\n    model = pickle.load(open(model_file_str,'rb'))\n    if print_status:\n      print('*Model loaded from file ' + model_file_str + '.\\n-----')\n  return model\n\n\ndef get_model_code():\n  \"\"\" Parse modelp and return stan model code \"\"\"\n  return \"\"\"\n  data {\n    int<lower=1> D;\n    int<lower=1> N;\n    vector[D] x[N];\n    vector[N] y;\n    real<lower=0> u1;\n    real<lower=0> u2;\n    real<lower=0> n1;\n    real<lower=0> n2;\n    real<lower=0> sigma;\n  }\n\n  parameters {\n    real<lower=u1, upper=u2> rho;\n    real<lower=0> alpha;\n  }\n\n  model {\n    matrix[N, N] cov =   cov_exp_quad(x, alpha, rho)\n                       + diag_matrix(rep_vector(square(sigma), N));\n    matrix[N, N] L_cov = cholesky_decompose(cov);\n    rho ~ uniform(u1, u2);\n    alpha ~ normal(n1, n2);\n    y ~ multi_normal_cholesky(rep_vector(0, N), L_cov);\n  }\n  \"\"\"\n\nif __name__ == '__main__':\n  get_model()\n"
  },
  {
    "path": "bo/util/__init__.py",
    "content": "\"\"\"\nMiscellaneous utilities.\n\"\"\"\n"
  },
  {
    "path": "bo/util/datatransform.py",
    "content": "\"\"\"\nClasses for transforming data.\n\"\"\"\n\nfrom argparse import Namespace\nimport numpy as np\nfrom sklearn.preprocessing import StandardScaler\n#import sklearn.preprocessing as sklp \n\nclass DataTransformer(object):\n  \"\"\" Class for transforming data \"\"\"\n\n  def __init__(self, datamat, printflag=True):\n    \"\"\" Constructor\n        Parameters:\n          datamat - numpy array (n x d) of data to be transformed\n    \"\"\"\n    self.datamat = datamat\n    self.set_transformers()\n    if printflag:\n      self.print_str()\n\n  def set_transformers(self):\n    \"\"\" Set transformers using self.datamat \"\"\"\n    self.ss = StandardScaler()\n    self.ss.fit(self.datamat)\n\n  def transform_data(self, datamat=None):\n    \"\"\" Return transformed datamat (default self.datamat) \"\"\"\n    if datamat is None:\n      datamat = self.datamat\n    return self.ss.transform(datamat)\n \n  def inv_transform_data(self, datamat):\n    \"\"\" Return inverse transform of datamat \"\"\"\n    return self.ss.inverse_transform(datamat)\n\n  def print_str(self):\n    \"\"\" Print a description string \"\"\"\n    print('*DataTransformer with self.datamat.shape = ' +\n      str(self.datamat.shape) + '.')\n    print('-----')\n"
  },
  {
    "path": "bo/util/print_utils.py",
    "content": "\"\"\"\nUtilities for printing and output\n\"\"\"\n\nimport os\n\nclass suppress_stdout_stderr(object):\n    ''' A context manager for doing a \"deep suppression\" of stdout and stderr in\n    Python, i.e. will suppress all print, even if the print originates in a\n    compiled C/Fortran sub-function.\n       This will not suppress raised exceptions, since exceptions are printed\n    to stderr just before a script exits, and after the context manager has\n    exited (at least, I think that is why it lets exceptions through). '''\n    def __init__(self):\n        # Open a pair of null files\n        self.null_fds = [os.open(os.devnull, os.O_RDWR) for x in range(2)]\n        # Save the actual stdout (1) and stderr (2) file descriptors.\n        self.save_fds = [os.dup(1), os.dup(2)]\n\n    def __enter__(self):\n        # Assign the null pointers to stdout and stderr.\n        os.dup2(self.null_fds[0], 1)\n        os.dup2(self.null_fds[1], 2)\n\n    def __exit__(self, *_):\n        # Re-assign the real stdout/stderr back to (1) and (2)\n        os.dup2(self.save_fds[0], 1)\n        os.dup2(self.save_fds[1], 2)\n        # Close the null files\n        for fd in self.null_fds + self.save_fds:\n            os.close(fd)\n"
  },
  {
    "path": "darts/__init__.py",
    "content": "\n"
  },
  {
    "path": "darts/arch.py",
    "content": "import numpy as np\nimport sys\nimport os\nimport copy\nimport random\n\nsys.path.append(os.path.expanduser('~/darts/cnn'))\nfrom train_class import Train\n\nOPS = ['none',\n       'max_pool_3x3',\n       'avg_pool_3x3',\n       'skip_connect',\n       'sep_conv_3x3',\n       'sep_conv_5x5',\n       'dil_conv_3x3',\n       'dil_conv_5x5'\n       ]\nNUM_VERTICES = 4\nINPUT_1 = 'c_k-2'\nINPUT_2 = 'c_k-1'\n\n\nclass Arch:\n\n    def __init__(self, arch):\n        self.arch = arch\n\n    def serialize(self):\n        return self.arch\n\n    def query(self, epochs=50):\n        trainer = Train()\n        val_losses, test_losses = trainer.main(self.arch, epochs=epochs)\n        val_loss = 100 - np.mean(val_losses)\n        test_loss = 100 - test_losses[-1]        \n        return val_loss, test_loss\n\n    @classmethod\n    def random_arch(cls):\n        # output a uniformly random architecture spec\n        # from the DARTS repository\n        # https://github.com/quark0/darts\n\n        normal = []\n        reduction = []\n        for i in range(NUM_VERTICES):\n            ops = np.random.choice(range(len(OPS)), NUM_VERTICES)\n\n            #input nodes for conv\n            nodes_in_normal = np.random.choice(range(i+2), 2, replace=False)\n            #input nodes for reduce\n            nodes_in_reduce = np.random.choice(range(i+2), 2, replace=False)\n\n            normal.extend([(nodes_in_normal[0], ops[0]), (nodes_in_normal[1], ops[1])])\n            reduction.extend([(nodes_in_reduce[0], ops[2]), (nodes_in_reduce[1], ops[3])])\n\n        return (normal, reduction)\n\n    def get_arch_list(self):\n        # convert tuple to list so that it is mutable\n        arch_list = []\n        for cell in self.arch:\n            arch_list.append([])\n            for pair in cell:\n                arch_list[-1].append([])\n                for num in pair:\n                    arch_list[-1][-1].append(num)\n        return arch_list\n\n    def mutate(self, edits):\n        \"\"\" mutate a single arch \"\"\"\n        # first convert tuple to array so that it is mutable\n        mutation = self.get_arch_list()\n\n        #make mutations\n        for _ in range(edits):\n            cell = np.random.choice(2)\n            pair = np.random.choice(len(OPS))\n            num = np.random.choice(2)\n            if num == 1:\n                mutation[cell][pair][num] = np.random.choice(len(OPS))\n            else:\n                inputs = pair // 2 + 2\n                choice = np.random.choice(inputs)\n                if pair % 2 == 0 and mutation[cell][pair+1][num] != choice:\n                    mutation[cell][pair][num] = choice\n                elif pair % 2 != 0 and mutation[cell][pair-1][num] != choice:\n                    mutation[cell][pair][num] = choice\n                      \n        return mutation\n\n    def get_paths(self):\n        \"\"\" return all paths from input to output \"\"\"\n\n        path_builder = [[[], [], [], []], [[], [], [], []]]\n        paths = [[], []]\n\n        for i, cell in enumerate(self.arch):\n            for j in range(len(OPS)):\n              if cell[j][0] == 0:\n                  path = [INPUT_1, OPS[cell[j][1]]]\n                  path_builder[i][j//2].append(path)\n                  paths[i].append(path)\n              elif cell[j][0] == 1:\n                  path = [INPUT_2, OPS[cell[j][1]]]\n                  path_builder[i][j//2].append(path)\n                  paths[i].append(path)\n              else:\n                  for path in path_builder[i][cell[j][0] - 2]:\n                      path = [*path, OPS[cell[j][1]]]\n                      path_builder[i][j//2].append(path)\n                      paths[i].append(path)\n\n        # check if there are paths of length >=5\n        contains_long_path = [False, False]\n        if max([len(path) for path in paths[0]]) >= 5:\n            contains_long_path[0] = True\n        if max([len(path) for path in paths[1]]) >= 5:\n            contains_long_path[1] = True\n\n        return paths, contains_long_path\n\n    def get_path_indices(self, long_paths=True):\n        \"\"\"\n        compute the index of each path\n        There are 4 * (8^0 + ... + 8^4) paths total\n        If long_paths = False, we give a single boolean to all paths of\n        size 4, so there are only 4 * (1 + 8^0 + ... + 8^3) paths\n        \"\"\"\n        paths, contains_long_path = self.get_paths()\n        normal_paths, reduce_paths = paths\n        num_ops = len(OPS)\n        \"\"\"\n        Compute the max number of paths per input per cell.\n        Since there are two cells and two inputs per cell, \n        total paths = 4 * max_paths\n        \"\"\"\n        if not long_paths:\n            max_paths = 1 + sum([num_ops ** i for i in range(NUM_VERTICES)])\n        else:\n            max_paths = sum([num_ops ** i for i in range(NUM_VERTICES + 1)])    \n        path_indices = []\n\n        # set the base index based on the cell and the input\n        for i, paths in enumerate((normal_paths, reduce_paths)):\n            for path in paths:\n                index = i * 2 * max_paths\n                if path[0] == INPUT_2:\n                    index += max_paths\n\n                # recursively compute the index of the path\n                for j in range(NUM_VERTICES + 1):\n                    if j == len(path) - 1:\n                        path_indices.append(index)\n                        break\n                    elif j == (NUM_VERTICES - 1) and not long_paths:\n                        path_indices.append(2 * (i + 1) * max_paths - 1)\n                        break\n                    else:\n                        index += num_ops ** j * (OPS.index(path[j + 1]) + 1)\n\n        return (tuple(path_indices), contains_long_path)\n\n    def encode_paths(self, long_paths=True):\n        # output one-hot encoding of paths\n        path_indices, _ = self.get_path_indices(long_paths=long_paths)\n        num_ops = len(OPS)\n\n        if not long_paths:\n            max_paths = 1 + sum([num_ops ** i for i in range(NUM_VERTICES)])\n        else:\n            max_paths = sum([num_ops ** i for i in range(NUM_VERTICES + 1)])    \n\n        path_encoding = np.zeros(4 * max_paths)\n        for index in path_indices:\n            path_encoding[index] = 1\n        return path_encoding\n\n    def path_distance(self, other):\n        # compute the distance between two architectures\n        # by comparing their path encodings\n        return np.sum(np.array(self.encode_paths() != np.array(other.encode_paths())))\n\n\n\n\n\n"
  },
  {
    "path": "data.py",
    "content": "import numpy as np\nimport pickle\nimport sys\nimport os\n\nif 'search_space' not in os.environ or os.environ['search_space'] == 'nasbench':\n    from nasbench import api\n    from nas_bench.cell import Cell\n\nelif os.environ['search_space'] == 'darts':\n    from darts.arch import Arch\n\nelif os.environ['search_space'][:12] == 'nasbench_201':\n    from nas_201_api import NASBench201API as API\n    from nas_bench_201.cell import Cell\n\nelse:\n    print('Invalid search space environ in data.py')\n    sys.exit()\n\n\nclass Data:\n\n    def __init__(self, \n                 search_space, \n                 dataset='cifar10', \n                 nasbench_folder='./', \n                 loaded_nasbench=None):\n        self.search_space = search_space\n        self.dataset = dataset\n\n        if loaded_nasbench:\n            self.nasbench = loaded_nasbench\n        elif search_space == 'nasbench':\n                self.nasbench = api.NASBench(nasbench_folder + 'nasbench_only108.tfrecord')\n        elif search_space == 'nasbench_201':\n            self.nasbench = API(os.path.expanduser('~/nas-bench-201/NAS-Bench-201-v1_0-e61699.pth'))\n        elif search_space != 'darts':\n            print(search_space, 'is not a valid search space')\n            sys.exit()\n\n    def get_type(self):\n        return self.search_space\n\n    def query_arch(self, \n                   arch=None, \n                   train=True, \n                   encoding_type='path', \n                   cutoff=-1,\n                   deterministic=True, \n                   epochs=0):\n\n        arch_dict = {}\n        arch_dict['epochs'] = epochs\n        if self.search_space in ['nasbench', 'nasbench_201']:\n            if arch is None:\n                arch = Cell.random_cell(self.nasbench)\n\n            arch_dict['spec'] = arch\n\n            if encoding_type == 'adj':\n                encoding = Cell(**arch).encode_standard()\n            elif encoding_type == 'path':\n                encoding = Cell(**arch).encode_paths()\n            elif encoding_type == 'trunc_path':\n                encoding = Cell(**arch).encode_paths()[:cutoff]\n            else:\n                print('invalid encoding type')\n\n            arch_dict['encoding'] = encoding\n\n            if train:\n                arch_dict['val_loss'] = Cell(**arch).get_val_loss(self.nasbench, \n                                                                    deterministic=deterministic,\n                                                                    dataset=self.dataset)\n                arch_dict['test_loss'] = Cell(**arch).get_test_loss(self.nasbench,\n                                                                    dataset=self.dataset)\n                arch_dict['num_params'] = Cell(**arch).get_num_params(self.nasbench)\n                arch_dict['val_per_param'] = (arch_dict['val_loss'] - 4.8) * (arch_dict['num_params'] ** 0.5) / 100\n\n        else:\n            if arch is None:\n                arch = Arch.random_arch()\n\n            arch_dict['spec'] = arch\n\n            if encoding_type == 'path':\n                encoding = Arch(arch).encode_paths()\n            elif encoding_type == 'trunc_path':\n                encoding = Arch(arch).encode_paths()[:cutoff]\n            else:\n                encoding = arch\n\n            arch_dict['encoding'] = encoding\n\n            if train:\n                if epochs == 0:\n                    epochs = 50\n                arch_dict['val_loss'], arch_dict['test_loss'] = Arch(arch).query(epochs=epochs)\n        \n        return arch_dict           \n\n    def mutate_arch(self, \n                    arch, \n                    mutation_rate=1.0):\n        if self.search_space in ['nasbench', 'nasbench_201']:\n            return Cell(**arch).mutate(self.nasbench, \n                                       mutation_rate=mutation_rate)\n        else:\n            return Arch(arch).mutate(int(mutation_rate))\n\n    def get_hash(self, arch):\n        # return the path indices of the architecture, used as a hash\n        if self.search_space == 'nasbench':\n            return Cell(**arch).get_path_indices()\n        elif self.search_space == 'darts':\n            return Arch(arch).get_path_indices()[0]\n        else:\n            return Cell(**arch).get_string()\n\n    def generate_random_dataset(self,\n                                num=10, \n                                train=True,\n                                encoding_type='path', \n                                cutoff=-1,\n                                random='standard',\n                                allow_isomorphisms=False, \n                                deterministic_loss=True,\n                                patience_factor=5):\n        \"\"\"\n        create a dataset of randomly sampled architectues\n        test for isomorphisms using a hash map of path indices\n        use patience_factor to avoid infinite loops\n        \"\"\"\n        data = []\n        dic = {}\n        tries_left = num * patience_factor\n        while len(data) < num:\n            tries_left -= 1\n            if tries_left <= 0:\n                break\n            arch_dict = self.query_arch(train=train,\n                                        encoding_type=encoding_type,\n                                        cutoff=cutoff,\n                                        deterministic=deterministic_loss)\n\n            h = self.get_hash(arch_dict['spec'])\n            if allow_isomorphisms or h not in dic:\n                dic[h] = 1\n                data.append(arch_dict)\n\n        return data\n\n    def get_candidates(self, \n                       data, \n                       num=100,\n                       acq_opt_type='mutation',\n                       encoding_type='path',\n                       cutoff=-1,\n                       loss='val_loss',\n                       patience_factor=5, \n                       deterministic_loss=True,\n                       num_arches_to_mutate=1,\n                       max_mutation_rate=1,\n                       allow_isomorphisms=False):\n        \"\"\"\n        Creates a set of candidate architectures with mutated and/or random architectures\n        \"\"\"\n\n        candidates = []\n        # set up hash map\n        dic = {}\n        for d in data:\n            arch = d['spec']\n            h = self.get_hash(arch)\n            dic[h] = 1\n\n        if acq_opt_type in ['mutation', 'mutation_random']:\n            # mutate architectures with the lowest loss\n            best_arches = [arch['spec'] for arch in sorted(data, key=lambda i:i[loss])[:num_arches_to_mutate * patience_factor]]\n\n            # stop when candidates is size num\n            # use patience_factor instead of a while loop to avoid long or infinite runtime\n            for arch in best_arches:\n                if len(candidates) >= num:\n                    break\n                for i in range(num // num_arches_to_mutate // max_mutation_rate):\n                    for rate in range(1, max_mutation_rate + 1):\n                        mutated = self.mutate_arch(arch, mutation_rate=rate)\n                        arch_dict = self.query_arch(mutated,\n                                                    train=False,\n                                                    encoding_type=encoding_type,\n                                                    cutoff=cutoff)\n                        h = self.get_hash(mutated)\n\n                        if allow_isomorphisms or h not in dic:\n                            dic[h] = 1    \n                            candidates.append(arch_dict)\n\n        if acq_opt_type in ['random', 'mutation_random']:\n            # add randomly sampled architectures to the set of candidates\n            for _ in range(num * patience_factor):\n                if len(candidates) >= 2 * num:\n                    break\n\n                arch_dict = self.query_arch(train=False, \n                                            encoding_type=encoding_type,\n                                            cutoff=cutoff)\n                h = self.get_hash(arch_dict['spec'])\n\n                if allow_isomorphisms or h not in dic:\n                    dic[h] = 1\n                    candidates.append(arch_dict)\n\n        return candidates\n\n    def remove_duplicates(self, candidates, data):\n        # input: two sets of architectues: candidates and data\n        # output: candidates with arches from data removed\n\n        dic = {}\n        for d in data:\n            dic[self.get_hash(d['spec'])] = 1\n        unduplicated = []\n        for candidate in candidates:\n            if self.get_hash(candidate['spec']) not in dic:\n                dic[self.get_hash(candidate['spec'])] = 1\n                unduplicated.append(candidate)\n        return unduplicated\n\n    def encode_data(self, dicts):\n        \"\"\"\n        method used by metann_runner.py (for Arch)\n        input: list of arch dictionary objects\n        output: xtrain (encoded architectures), ytrain (val loss)\n        \"\"\"\n        data = []\n\n        for dic in dicts:\n            arch = dic['spec']\n            encoding = Arch(arch).encode_paths()\n            data.append((arch, encoding, dic['val_loss_avg'], None))\n\n        return data\n\n    def get_arch_list(self,\n                      aux_file_path, \n                      iteridx=0, \n                      num_top_arches=5,\n                      max_edits=20, \n                      num_repeats=5,\n                      verbose=1):\n        # Method used for gp_bayesopt\n\n        if self.search_space == 'darts':\n            print('get_arch_list only supported for nasbench and nasbench_201')\n            sys.exit()\n\n        # load the list of architectures chosen by bayesopt so far\n        base_arch_list = pickle.load(open(aux_file_path, 'rb'))\n        top_arches = [archtuple[0] for archtuple in base_arch_list[:num_top_arches]]\n        if verbose:\n            top_5_loss = [archtuple[1][0] for archtuple in base_arch_list[:min(5, len(base_arch_list))]]\n            print('top 5 val losses {}'.format(top_5_loss))\n\n        # perturb the best k architectures    \n        dic = {}\n        for archtuple in base_arch_list:\n            path_indices = Cell(**archtuple[0]).get_path_indices()\n            dic[path_indices] = 1\n\n        new_arch_list = []\n        for arch in top_arches:\n            for edits in range(1, max_edits):\n                for _ in range(num_repeats):\n                    perturbation = Cell(**arch).perturb(self.nasbench, edits)\n                    path_indices = Cell(**perturbation).get_path_indices()\n                    if path_indices not in dic:\n                        dic[path_indices] = 1\n                        new_arch_list.append(perturbation)\n\n        # make sure new_arch_list is not empty\n        while len(new_arch_list) == 0:\n            for _ in range(100):\n                arch = Cell.random_cell(self.nasbench)\n                path_indices = Cell(**arch).get_path_indices()\n                if path_indices not in dic:\n                    dic[path_indices] = 1\n                    new_arch_list.append(arch)\n\n        return new_arch_list\n\n    @classmethod\n    def generate_distance_matrix(cls, arches_1, arches_2, distance):\n        # Method used for gp_bayesopt for nasbench\n        matrix = np.zeros([len(arches_1), len(arches_2)])\n        for i, arch_1 in enumerate(arches_1):\n            for j, arch_2 in enumerate(arches_2):\n                if distance == 'edit_distance':\n                    matrix[i][j] = Cell(**arch_1).edit_distance(Cell(**arch_2))\n                elif distance == 'path_distance':\n                    matrix[i][j] = Cell(**arch_1).path_distance(Cell(**arch_2))        \n                elif distance == 'trunc_path_distance':\n                    matrix[i][j] = Cell(**arch_1).path_distance(Cell(**arch_2))        \n                elif distance == 'nasbot_distance':\n                    matrix[i][j] = Cell(**arch_1).nasbot_distance(Cell(**arch_2))  \n                else:\n                    print('{} is an invalid distance'.format(distance))\n                    sys.exit()\n        return matrix\n"
  },
  {
    "path": "meta_neural_net.py",
    "content": "import argparse\nimport itertools\nimport os\nimport random\nimport sys\n\nimport numpy as np\nfrom matplotlib import pyplot as plt\nfrom tensorflow import keras\nimport tensorflow as tf\nfrom tensorflow.keras import backend as K\nfrom tensorflow.keras.models import Sequential\nfrom tensorflow.keras.optimizers import Adam\n\ndef mle_loss(y_true, y_pred):\n    # Minimum likelihood estimate loss function\n    mean = tf.slice(y_pred, [0, 0], [-1, 1])\n    var = tf.slice(y_pred, [0, 1], [-1, 1])\n    return 0.5 * tf.log(2*np.pi*var) + tf.square(y_true - mean) / (2*var)\n\n\ndef mape_loss(y_true, y_pred):\n    # Minimum absolute percentage error loss function\n    lower_bound = 4.5\n    fraction = tf.math.divide(tf.subtract(y_pred, lower_bound), \\\n        tf.subtract(y_true, lower_bound))\n    return tf.abs(tf.subtract(fraction, 1))\n\n\nclass MetaNeuralnet:\n\n    def get_dense_model(self, \n                        input_dims, \n                        num_layers,\n                        layer_width,\n                        loss,\n                        regularization):\n        input_layer = keras.layers.Input(input_dims)\n        model = keras.models.Sequential()\n\n        for _ in range(num_layers):\n            model.add(keras.layers.Dense(layer_width, activation='relu'))\n\n        model = model(input_layer)\n        if loss == 'mle':\n            mean = keras.layers.Dense(1)(model)\n            var = keras.layers.Dense(1)(model)\n            var = keras.layers.Activation(tf.math.softplus)(var)\n            output = keras.layers.concatenate([mean, var])\n        else:\n            if regularization == 0:\n                output = keras.layers.Dense(1)(model)\n            else:\n                reg = keras.regularizers.l1(regularization)\n                output = keras.layers.Dense(1, kernel_regularizer=reg)(model)\n\n        dense_net = keras.models.Model(inputs=input_layer, outputs=output)\n        return dense_net\n\n    def fit(self, xtrain, ytrain, \n            num_layers=10,\n            layer_width=20,\n            loss='mae',\n            epochs=200, \n            batch_size=32, \n            lr=.01, \n            verbose=0, \n            regularization=0,\n            **kwargs):\n\n        if loss == 'mle':\n            loss_fn = mle_loss\n        elif loss == 'mape':\n            loss_fn = mape_loss\n        else:\n            loss_fn = 'mae'\n\n        self.model = self.get_dense_model((xtrain.shape[1],), \n                                            loss=loss_fn,\n                                            num_layers=num_layers,\n                                            layer_width=layer_width,\n                                            regularization=regularization)\n        optimizer = keras.optimizers.Adam(lr=lr, beta_1=.9, beta_2=.99)\n\n        self.model.compile(optimizer=optimizer, loss=loss_fn)\n        #print(self.model.summary())\n        self.model.fit(xtrain, ytrain, \n                        batch_size=batch_size, \n                        epochs=epochs, \n                        verbose=verbose)\n\n        train_pred = np.squeeze(self.model.predict(xtrain))\n        train_error = np.mean(abs(train_pred-ytrain))\n        return train_error\n\n    def predict(self, xtest):\n        return self.model.predict(xtest)\n"
  },
  {
    "path": "meta_neuralnet.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Train a Meta Neural Network on NASBench\\n\",\n    \"## Predict the accuracy of neural networks to within one percent!\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"%load_ext autoreload\\n\",\n    \"%autoreload 2\\n\",\n    \"%matplotlib inline\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import numpy as np\\n\",\n    \"from matplotlib import pyplot as plt\\n\",\n    \"from nasbench import api\\n\",\n    \"\\n\",\n    \"from data import Data\\n\",\n    \"from meta_neural_net import MetaNeuralnet\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# define a function to plot the meta neural networks\\n\",\n    \"\\n\",\n    \"def plot_meta_neuralnet(ytrain, train_pred, ytest, test_pred, max_disp=500, title=None):\\n\",\n    \"    \\n\",\n    \"    plt.scatter(ytrain[:max_disp], train_pred[:max_disp], label='training data', alpha=0.7, s=64)\\n\",\n    \"    plt.scatter(ytest[:max_disp], test_pred[:max_disp], label = 'test data', alpha=0.7, marker='^')\\n\",\n    \"\\n\",\n    \"    # axis limits\\n\",\n    \"    plt.xlim((5, 15))\\n\",\n    \"    plt.ylim((5, 15))\\n\",\n    \"    ax_lim = np.array([np.min([plt.xlim()[0], plt.ylim()[0]]),\\n\",\n    \"                    np.max([plt.xlim()[1], plt.ylim()[1]])])\\n\",\n    \"    plt.xlim(ax_lim)\\n\",\n    \"    plt.ylim(ax_lim)\\n\",\n    \"    \\n\",\n    \"    # 45-degree line\\n\",\n    \"    plt.plot(ax_lim, ax_lim, 'k:') \\n\",\n    \"     \\n\",\n    \"    plt.gca().set_aspect('equal', adjustable='box')\\n\",\n    \"    plt.title(title)\\n\",\n    \"    plt.legend(loc='best')\\n\",\n    \"    plt.xlabel('true percent error')\\n\",\n    \"    plt.ylabel('predicted percent error')\\n\",\n    \"    plt.show()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"scrolled\": false\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"# load the NASBench dataset\\n\",\n    \"# takes about 1 minute to load the nasbench dataset\\n\",\n    \"search_space = Data('nasbench')\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# method which runs a meta neural network experiment\\n\",\n    \"def meta_neuralnet_experiment(params, \\n\",\n    \"                              ns=[100, 500], \\n\",\n    \"                              num_ensemble=3, \\n\",\n    \"                              test_size=500,\\n\",\n    \"                              cutoff=40,\\n\",\n    \"                              plot=True):\\n\",\n    \"    \\n\",\n    \"    for n in ns:\\n\",\n    \"        for encoding_type in ['adj', 'path']:\\n\",\n    \"\\n\",\n    \"            train_data = search_space.generate_random_dataset(num=n, \\n\",\n    \"                                                encoding_type=encoding_type,\\n\",\n    \"                                                cutoff=cutoff)\\n\",\n    \"            \\n\",\n    \"            test_data = search_space.generate_random_dataset(num=test_size, \\n\",\n    \"                                                encoding_type=encoding_type,\\n\",\n    \"                                                cutoff=cutoff)\\n\",\n    \"            \\n\",\n    \"            print(len(test_data))\\n\",\n    \"            test_data = search_space.remove_duplicates(test_data, train_data)\\n\",\n    \"            print(len(test_data))\\n\",\n    \"            \\n\",\n    \"            xtrain = np.array([d['encoding'] for d in train_data])\\n\",\n    \"            ytrain = np.array([d['val_loss'] for d in train_data])\\n\",\n    \"\\n\",\n    \"            xtest = np.array([d['encoding'] for d in test_data])\\n\",\n    \"            ytest = np.array([d['val_loss'] for d in test_data])\\n\",\n    \"\\n\",\n    \"            train_errors = []\\n\",\n    \"            test_errors = []\\n\",\n    \"            meta_neuralnet = MetaNeuralnet()\\n\",\n    \"            for _ in range(num_ensemble):            \\n\",\n    \"                meta_neuralnet.fit(xtrain, ytrain, **params)\\n\",\n    \"                train_pred = np.squeeze(meta_neuralnet.predict(xtrain))\\n\",\n    \"                train_error = np.mean(abs(train_pred-ytrain))\\n\",\n    \"                train_errors.append(train_error)\\n\",\n    \"                test_pred = np.squeeze(meta_neuralnet.predict(xtest))        \\n\",\n    \"                test_error = np.mean(abs(test_pred-ytest))\\n\",\n    \"                test_errors.append(test_error)\\n\",\n    \"\\n\",\n    \"            train_error = np.round(np.mean(train_errors, axis=0), 3)\\n\",\n    \"            test_error = np.round(np.mean(test_errors, axis=0), 3)\\n\",\n    \"            print('Meta neuralnet training size: {}, encoding type: {}'.format(n, encoding_type))\\n\",\n    \"            print('Train error: {}, test error: {}'.format(train_error, test_error))\\n\",\n    \"\\n\",\n    \"            if plot:\\n\",\n    \"                if encoding_type == 'path':\\n\",\n    \"                    title = 'Path encoding, training set size {}'.format(n)\\n\",\n    \"                else:\\n\",\n    \"                    title = 'Adjacency list encoding, training set size {}'.format(n)            \\n\",\n    \"\\n\",\n    \"                plot_meta_neuralnet(ytrain, train_pred, ytest, test_pred, title=title)\\n\",\n    \"                plt.show()          \\n\",\n    \"            print('correlation', np.corrcoef(ytest, test_pred)[1,0])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"meta_neuralnet_params = {'loss':'mae', 'num_layers':10, 'layer_width':20, 'epochs':200, \\\\\\n\",\n    \"                         'batch_size':32, 'lr':.01, 'regularization':0, 'verbose':0}\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {\n    \"scrolled\": false\n   },\n   \"outputs\": [],\n   \"source\": [\n    \"meta_neuralnet_experiment(meta_neuralnet_params)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": []\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Python 3\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.7.7\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 2\n}\n"
  },
  {
    "path": "metann_runner.py",
    "content": "import argparse\nimport time\nimport logging\nimport sys\nimport os\nimport pickle\nimport numpy as np\n\nfrom acquisition_functions import acq_fn\nfrom data import Data\nfrom meta_neural_net import MetaNeuralnet\n\n\n\"\"\"\nmeta neural net runner is used in run_experiments_parallel\n\n - loads data by opening k*i pickle files from previous iterations\n - trains a meta neural network and predicts accuracy of all candidates\n - outputs k pickle files of the architecture to be trained next\n\"\"\"\n\ndef run_meta_neuralnet(search_space, dicts,\n                        k=10,\n                        verbose=1, \n                        num_ensemble=5, \n                        epochs=10000,\n                        lr=0.00001,\n                        loss='scaled',\n                        explore_type='its',\n                        explore_factor=0.5):\n\n    # data: list of arch dictionary objects\n    # trains a meta neural network\n    # returns list of k arch dictionary objects - the k best predicted\n\n    results = []\n    meta_neuralnet = MetaNeuralnet()\n    data = search_space.encode_data(dicts)\n    xtrain = np.array([d[1] for d in data])\n    ytrain = np.array([d[2] for d in data])\n\n    candidates = search_space.get_candidates(data, \n                                            acq_opt_type='mutation_random',\n                                            encode_paths=True, \n                                            allow_isomorphisms=True,\n                                            deterministic_loss=None)\n\n    xcandidates = np.array([c[1] for c in candidates])\n    candidates_specs = [c[0] for c in candidates]\n    predictions = []\n\n    # train an ensemble of neural networks\n    train_error = 0\n    for _ in range(num_ensemble):\n        meta_neuralnet = MetaNeuralnet()\n        train_error += meta_neuralnet.fit(xtrain, ytrain,\n                                            loss=loss,\n                                            epochs=epochs,\n                                            lr=lr)\n        predictions.append(np.squeeze(meta_neuralnet.predict(xcandidates)))\n    train_error /= num_ensemble\n    if verbose:\n        print('Meta neural net train error: {}'.format(train_error))\n\n    sorted_indices = acq_fn(predictions, explore_type)\n\n    top_k_candidates = [candidates_specs[i] for i in sorted_indices[:k]]\n    candidates_dict = []\n    for candidate in top_k_candidates:\n        d = {}\n        d['spec'] = candidate\n        candidates_dict.append(d)\n\n    return candidates_dict\n\n\ndef run(args):\n\n    save_dir = '{}/'.format(args.experiment_name)\n    if not os.path.exists(save_dir):\n        os.mkdir(save_dir)\n\n    query = args.query\n    k = args.k\n    trained_prefix = args.trained_filename\n    untrained_prefix = args.untrained_filename\n    threshold = args.threshold\n\n    search_space = Data('darts')\n\n    # if it's the first iteration, choose k arches at random to train\n    if query == 0:\n        print('about to generate {} random'.format(k))\n        data = search_space.generate_random_dataset(num=k, train=False)\n        arches = [d['spec'] for d in data]\n\n        next_arches = []\n        for arch in arches:\n            d = {}\n            d['spec'] = arch\n            next_arches.append(d)\n\n    else:\n        # get the data from prior iterations from pickle files\n        data = []\n        for i in range(query):\n\n            filepath = '{}{}_{}.pkl'.format(save_dir, trained_prefix, i)\n            with open(filepath, 'rb') as f:\n                arch = pickle.load(f)\n            data.append(arch)\n\n        print('Iteration {}'.format(query))\n        print('Data from last round')\n        print(data)\n\n        # run the meta neural net to output the next arches\n        next_arches = run_meta_neuralnet(search_space, data, k=k)\n\n    print('next batch')\n    print(next_arches)\n\n    # output the new arches to pickle files\n    for i in range(k):\n        index = query + i\n        filepath = '{}{}_{}.pkl'.format(save_dir, untrained_prefix, index)\n        next_arches[i]['index'] = index\n        next_arches[i]['filepath'] = filepath\n        with open(filepath, 'wb') as f:\n            pickle.dump(next_arches[i], f)\n\n\ndef main(args):\n\n    #set up save dir\n    save_dir = './'\n\n    #set up logging\n    log_format = '%(asctime)s %(message)s'\n    logging.basicConfig(stream=sys.stdout, level=logging.INFO,\n        format=log_format, datefmt='%m/%d %I:%M:%S %p')\n    fh = logging.FileHandler(os.path.join(save_dir, 'log.txt'))\n    fh.setFormatter(logging.Formatter(log_format))\n    logging.getLogger().addHandler(fh)\n    logging.info(args)\n\n    run(args)\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(description='Args for meta neural net')\n    parser.add_argument('--experiment_name', type=str, default='darts_test', help='Folder for input/output files')\n    parser.add_argument('--params', type=str, default='test', help='Which set of params to use')\n    parser.add_argument('--query', type=int, default=0, help='Which query is Neural BayesOpt on')\n    parser.add_argument('--trained_filename', type=str, default='trained_spec', help='name of input files')\n    parser.add_argument('--untrained_filename', type=str, default='untrained_spec', help='name of output files')\n    parser.add_argument('--k', type=int, default=10, help='number of arches to train per iteration')\n    parser.add_argument('--threshold', type=int, default=20, help='throw out arches with val loss above threshold')\n\n    args = parser.parse_args()\n    main(args)"
  },
  {
    "path": "nas_algorithms.py",
    "content": "import itertools\nimport os\nimport pickle\nimport sys\nimport copy\nimport numpy as np\nimport tensorflow as tf\nfrom argparse import Namespace\n\nfrom data import Data\n\n\ndef run_nas_algorithm(algo_params, search_space, mp):\n\n    # run nas algorithm\n    ps = copy.deepcopy(algo_params)\n    algo_name = ps.pop('algo_name')\n\n    if algo_name == 'random':\n        data = random_search(search_space, **ps)\n    elif algo_name == 'evolution':\n        data = evolution_search(search_space, **ps)\n    elif algo_name == 'bananas':\n        data = bananas(search_space, mp, **ps)\n    elif algo_name == 'gp_bayesopt':\n        data = gp_bayesopt_search(search_space, **ps)\n    elif algo_name == 'dngo':\n        data = dngo_search(search_space, **ps)\n    else:\n        print('invalid algorithm name')\n        sys.exit()\n\n    k = 10\n    if 'k' in ps:\n        k = ps['k']\n    total_queries = 150\n    if 'total_queries' in ps:\n        total_queries = ps['total_queries']\n    loss = 'val_loss'\n    if 'loss' in ps:\n        loss = ps['loss']\n\n    return compute_best_test_losses(data, k, total_queries, loss), data\n\n\ndef compute_best_test_losses(data, k, total_queries, loss):\n    \"\"\"\n    Given full data from a completed nas algorithm,\n    output the test error of the arch with the best val error \n    after every multiple of k\n    \"\"\"\n    results = []\n    for query in range(k, total_queries + k, k):\n        best_arch = sorted(data[:query], key=lambda i:i[loss])[0]\n        test_error = best_arch['test_loss']\n        results.append((query, test_error))\n\n    return results\n\n\ndef random_search(search_space,\n                  total_queries=150, \n                  loss='val_loss',\n                  deterministic=True,\n                  verbose=1):\n    \"\"\" \n    random search\n    \"\"\"\n    data = search_space.generate_random_dataset(num=total_queries, \n                                                encoding_type='adj',\n                                                deterministic_loss=deterministic)\n    \n    if verbose:\n        top_5_loss = sorted([d[loss] for d in data])[:min(5, len(data))]\n        print('random, query {}, top 5 losses {}'.format(total_queries, top_5_loss))    \n    return data\n\n\ndef evolution_search(search_space,\n                     total_queries=150,\n                     num_init=10,\n                     k=10,\n                     loss='val_loss',\n                     population_size=30,                       \n                     tournament_size=10,\n                     mutation_rate=1.0, \n                     deterministic=True,\n                     regularize=True,\n                     verbose=1):\n    \"\"\"\n    regularized evolution\n    \"\"\"\n    data = search_space.generate_random_dataset(num=num_init, \n                                                deterministic_loss=deterministic)\n\n    losses = [d[loss] for d in data]\n    query = num_init\n    population = [i for i in range(min(num_init, population_size))]\n\n    while query <= total_queries:\n\n        # evolve the population by mutating the best architecture\n        # from a random subset of the population\n        sample = np.random.choice(population, tournament_size)\n        best_index = sorted([(i, losses[i]) for i in sample], key=lambda i:i[1])[0][0]\n        mutated = search_space.mutate_arch(data[best_index]['spec'],\n                                           mutation_rate=mutation_rate)\n        arch_dict = search_space.query_arch(mutated, deterministic=deterministic)\n        data.append(arch_dict)        \n        losses.append(arch_dict[loss])\n        population.append(len(data) - 1)\n\n        # kill the oldest (or worst) from the population\n        if len(population) >= population_size:\n            if regularize:\n                oldest_index = sorted([i for i in population])[0]\n                population.remove(oldest_index)\n            else:\n                worst_index = sorted([(i, losses[i]) for i in population], key=lambda i:i[1])[-1][0]\n                population.remove(worst_index)\n\n        if verbose and (query % k == 0):\n            top_5_loss = sorted([d[loss] for d in data])[:min(5, len(data))]\n            print('evolution, query {}, top 5 losses {}'.format(query, top_5_loss))\n\n        query += 1\n\n    return data\n\n\ndef bananas(search_space, \n            metann_params,\n            num_init=10, \n            k=10, \n            loss='val_loss',\n            total_queries=150, \n            num_ensemble=5, \n            acq_opt_type='mutation',\n            num_arches_to_mutate=1,\n            explore_type='its',\n            encoding_type='trunc_path',\n            cutoff=40,\n            deterministic=True,\n            verbose=1):\n    \"\"\"\n    Bayesian optimization with a neural network model\n    \"\"\"\n    from acquisition_functions import acq_fn\n    from meta_neural_net import MetaNeuralnet\n\n    data = search_space.generate_random_dataset(num=num_init, \n                                                encoding_type=encoding_type, \n                                                cutoff=cutoff,\n                                                deterministic_loss=deterministic)\n\n    query = num_init + k\n\n    while query <= total_queries:\n\n        xtrain = np.array([d['encoding'] for d in data])\n        ytrain = np.array([d[loss] for d in data])\n\n        if (query == num_init + k) and verbose:\n            print('bananas xtrain shape', xtrain.shape)\n            print('bananas ytrain shape', ytrain.shape)\n\n        # get a set of candidate architectures\n        candidates = search_space.get_candidates(data, \n                                                 acq_opt_type=acq_opt_type,\n                                                 encoding_type=encoding_type, \n                                                 cutoff=cutoff,\n                                                 num_arches_to_mutate=num_arches_to_mutate,\n                                                 loss=loss,\n                                                 deterministic_loss=deterministic)\n\n        xcandidates = np.array([c['encoding'] for c in candidates])\n        candidate_predictions = []\n\n        # train an ensemble of neural networks\n        train_error = 0\n        for _ in range(num_ensemble):\n            meta_neuralnet = MetaNeuralnet()\n            train_error += meta_neuralnet.fit(xtrain, ytrain, **metann_params)\n\n            # predict the validation loss of the candidate architectures\n            candidate_predictions.append(np.squeeze(meta_neuralnet.predict(xcandidates)))\n\n            # clear the tensorflow graph\n            tf.reset_default_graph()\n\n        tf.keras.backend.clear_session()\n\n        train_error /= num_ensemble\n        if verbose:\n            print('query {}, Meta neural net train error: {}'.format(query, train_error))\n\n        # compute the acquisition function for all the candidate architectures\n        candidate_indices = acq_fn(candidate_predictions, explore_type)\n\n        # add the k arches with the minimum acquisition function values\n        for i in candidate_indices[:k]:\n\n            arch_dict = search_space.query_arch(candidates[i]['spec'],\n                                                encoding_type=encoding_type,\n                                                cutoff=cutoff,\n                                                deterministic=deterministic)\n            data.append(arch_dict)\n\n        if verbose:\n            top_5_loss = sorted([(d[loss], d['epochs']) for d in data], key=lambda d: d[0])[:min(5, len(data))]\n            print('bananas, query {}, top 5 losses (loss, test, epoch): {}'.format(query, top_5_loss))\n            recent_10_loss = [(d[loss], d['epochs']) for d in data[-10:]]\n            print('bananas, query {}, most recent 10 (loss, test, epoch): {}'.format(query, recent_10_loss))\n\n        query += k\n\n    return data\n\n\ndef gp_bayesopt_search(search_space,\n                        num_init=10,\n                        k=10,\n                        total_queries=150,\n                        distance='edit_distance',\n                        deterministic=True,\n                        tmpdir='./temp',\n                        max_iter=200,\n                        mode='single_process',\n                        nppred=1000):\n    \"\"\"\n    Bayesian optimization with a GP prior\n    \"\"\"\n    from bo.bo.probo import ProBO\n\n    # set up the path for auxiliary pickle files\n    if not os.path.exists(tmpdir):\n        os.mkdir(tmpdir)\n    aux_file_path = os.path.join(tmpdir, 'aux.pkl')\n\n    num_iterations = total_queries - num_init\n\n    # black-box function that bayesopt will optimize\n    def fn(arch):\n        return search_space.query_arch(arch, deterministic=deterministic)['val_loss']\n\n    # set all the parameters for the various BayesOpt classes\n    fhp = Namespace(fhstr='object', namestr='train')\n    domp = Namespace(dom_str='list', set_domain_list_auto=True,\n                     aux_file_path=aux_file_path,\n                     distance=distance)\n    modelp = Namespace(kernp=Namespace(ls=3., alpha=1.5, sigma=1e-5),\n                       infp=Namespace(niter=num_iterations, nwarmup=500),\n                       distance=distance, search_space=search_space.get_type())\n    amp = Namespace(am_str='mygpdistmat_ucb', nppred=nppred, modelp=modelp)\n    optp = Namespace(opt_str='rand', max_iter=max_iter)\n    makerp = Namespace(domp=domp, amp=amp, optp=optp)\n    probop = Namespace(niter=num_iterations, fhp=fhp,\n                       makerp=makerp, tmpdir=tmpdir, mode=mode)\n    data = Namespace()\n\n    # Set up initial data\n    init_data = search_space.generate_random_dataset(num=num_init, \n                                                     deterministic_loss=deterministic)\n    data.X = [d['spec'] for d in init_data]\n    data.y = np.array([[d['val_loss']] for d in init_data])\n\n    # initialize aux file\n    pairs = [(data.X[i], data.y[i]) for i in range(len(data.y))]\n    pairs.sort(key=lambda x: x[1])\n    with open(aux_file_path, 'wb') as f:\n        pickle.dump(pairs, f)\n\n    # run Bayesian Optimization\n    bo = ProBO(fn, search_space, aux_file_path, data, probop, True)\n    bo.run_bo()\n\n    # get the validation and test loss for all architectures chosen by BayesOpt\n    results = []\n    for arch in data.X:\n        archtuple = search_space.query_arch(arch)\n        results.append(archtuple)\n\n    return results\n\n\ndef dngo_search(search_space,\n                num_init=10,\n                k=10,\n                loss='val_loss',\n                total_queries=150,\n                encoding_type='path',\n                cutoff=40,\n                acq_opt_type='mutation',\n                explore_type='ucb',\n                deterministic=True,\n                verbose=True):\n\n    import torch\n    from pybnn import DNGO\n    from pybnn.util.normalization import zero_mean_unit_var_normalization, zero_mean_unit_var_denormalization\n    from acquisition_functions import acq_fn\n\n    def fn(arch):\n        return search_space.query_arch(arch, deterministic=deterministic)[loss]\n\n    # set up initial data\n    data = search_space.generate_random_dataset(num=num_init, \n                                                encoding_type=encoding_type,\n                                                cutoff=cutoff,\n                                                deterministic_loss=deterministic)\n\n    query = num_init + k\n\n    while query <= total_queries:\n\n        # set up data\n        x = np.array([d['encoding'] for d in data])\n        y = np.array([d[loss] for d in data])\n\n        # get a set of candidate architectures\n        candidates = search_space.get_candidates(data, \n                                                 acq_opt_type=acq_opt_type,\n                                                 encoding_type=encoding_type, \n                                                 cutoff=cutoff,\n                                                 deterministic_loss=deterministic)\n\n        xcandidates = np.array([d['encoding'] for d in candidates])\n\n        # train the model\n        model = DNGO(do_mcmc=False)\n        model.train(x, y, do_optimize=True)\n\n        predictions = model.predict(xcandidates)\n        candidate_indices = acq_fn(np.array(predictions), explore_type)\n\n        # add the k arches with the minimum acquisition function values\n        for i in candidate_indices[:k]:\n            arch_dict = search_space.query_arch(candidates[i]['spec'],\n                                                encoding_type=encoding_type,\n                                                cutoff=cutoff,\n                                                deterministic=deterministic)\n            data.append(arch_dict)\n\n        if verbose:\n            top_5_loss = sorted([(d[loss], d['epochs']) for d in data], key=lambda d: d[0])[:min(5, len(data))]\n            print('dngo, query {}, top 5 val losses (val, test, epoch): {}'.format(query, top_5_loss))\n            recent_10_loss = [(d[loss], d['epochs']) for d in data[-10:]]\n            print('dngo, query {}, most recent 10 (val, test, epoch): {}'.format(query, recent_10_loss))\n\n        query += k\n\n    return data\n"
  },
  {
    "path": "nas_bench/__init__.py",
    "content": "\n"
  },
  {
    "path": "nas_bench/cell.py",
    "content": "import numpy as np\nimport copy\nimport itertools\nimport random\nimport sys\nimport os\nimport pickle\n\nfrom nasbench import api\n\n\nINPUT = 'input'\nOUTPUT = 'output'\nCONV3X3 = 'conv3x3-bn-relu'\nCONV1X1 = 'conv1x1-bn-relu'\nMAXPOOL3X3 = 'maxpool3x3'\nOPS = [CONV3X3, CONV1X1, MAXPOOL3X3]\n\nNUM_VERTICES = 7\nOP_SPOTS = NUM_VERTICES - 2\nMAX_EDGES = 9\n\n\nclass Cell:\n\n    def __init__(self, matrix, ops):\n\n        self.matrix = matrix\n        self.ops = ops\n\n    def serialize(self):\n        return {\n            'matrix': self.matrix,\n            'ops': self.ops\n        }\n\n    def modelspec(self):\n        return api.ModelSpec(matrix=self.matrix, ops=self.ops)\n\n    @classmethod\n    def random_cell(cls, nasbench):\n        \"\"\" \n        From the NASBench repository \n\n        one-hot adjacency matrix\n        draw [0,1] for each slot in the adjacency matrix\n        \"\"\"\n        while True:\n            matrix = np.random.choice(\n                [0, 1], size=(NUM_VERTICES, NUM_VERTICES))\n            matrix = np.triu(matrix, 1)\n            ops = np.random.choice(OPS, size=NUM_VERTICES).tolist()\n            ops[0] = INPUT\n            ops[-1] = OUTPUT\n            spec = api.ModelSpec(matrix=matrix, ops=ops)\n            if nasbench.is_valid(spec):\n                return {\n                    'matrix': matrix,\n                    'ops': ops\n                }\n\n    def get_val_loss(self, nasbench, deterministic=1, patience=50, epochs=None, dataset=None):\n        if not deterministic:\n            # output one of the three validation accuracies at random\n            if epochs:\n                return (100*(1 - nasbench.query(api.ModelSpec(matrix=self.matrix, ops=self.ops), epochs=epochs)['validation_accuracy']))\n            else:\n                return (100*(1 - nasbench.query(api.ModelSpec(matrix=self.matrix, ops=self.ops))['validation_accuracy']))\n        else:        \n            # query the api until we see all three accuracies, then average them\n            # a few architectures only have two accuracies, so we use patience to avoid an infinite loop\n            accs = []\n            while len(accs) < 3 and patience > 0:\n                patience -= 1\n                if epochs:\n                    acc = nasbench.query(api.ModelSpec(matrix=self.matrix, ops=self.ops), epochs=epochs)['validation_accuracy']\n                else:\n                    acc = nasbench.query(api.ModelSpec(matrix=self.matrix, ops=self.ops))['validation_accuracy']\n                if acc not in accs:\n                    accs.append(acc)\n            return round(100*(1-np.mean(accs)), 4)            \n\n\n    def get_test_loss(self, nasbench, patience=50, epochs=None, dataset=None):\n        \"\"\"\n        query the api until we see all three accuracies, then average them\n        a few architectures only have two accuracies, so we use patience to avoid an infinite loop\n        \"\"\"\n        accs = []\n        while len(accs) < 3 and patience > 0:\n            patience -= 1\n            if epochs:\n                acc = nasbench.query(api.ModelSpec(matrix=self.matrix, ops=self.ops), epochs=epochs)['test_accuracy']\n            else:\n                acc = nasbench.query(api.ModelSpec(matrix=self.matrix, ops=self.ops))['test_accuracy']\n            if acc not in accs:\n                accs.append(acc)\n        return round(100*(1-np.mean(accs)), 4)\n\n    def get_num_params(self, nasbench):\n        return nasbench.query(api.ModelSpec(matrix=self.matrix, ops=self.ops))['trainable_parameters']\n\n    def perturb(self, nasbench, edits=1):\n        \"\"\" \n        create new perturbed cell \n        inspird by https://github.com/google-research/nasbench\n        \"\"\"\n        new_matrix = copy.deepcopy(self.matrix)\n        new_ops = copy.deepcopy(self.ops)\n        for _ in range(edits):\n            while True:\n                if np.random.random() < 0.5:\n                    for src in range(0, NUM_VERTICES - 1):\n                        for dst in range(src+1, NUM_VERTICES):\n                            new_matrix[src][dst] = 1 - new_matrix[src][dst]\n                else:\n                    for ind in range(1, NUM_VERTICES - 1):\n                        available = [op for op in OPS if op != new_ops[ind]]\n                        new_ops[ind] = np.random.choice(available)\n\n                new_spec = api.ModelSpec(new_matrix, new_ops)\n                if nasbench.is_valid(new_spec):\n                    break\n        return {\n            'matrix': new_matrix,\n            'ops': new_ops\n        }\n\n    def mutate(self, \n               nasbench, \n               mutation_rate=1.0, \n               patience=5000):\n        \"\"\"\n        A stochastic approach to perturbing the cell\n        inspird by https://github.com/google-research/nasbench\n        \"\"\"\n        p = 0\n        while p < patience:\n            p += 1\n            new_matrix = copy.deepcopy(self.matrix)\n            new_ops = copy.deepcopy(self.ops)\n\n            edge_mutation_prob = mutation_rate / (NUM_VERTICES * (NUM_VERTICES - 1) / 2)\n            # flip each edge w.p. so expected flips is 1. same for ops\n            for src in range(0, NUM_VERTICES - 1):\n                for dst in range(src + 1, NUM_VERTICES):\n                    if random.random() < edge_mutation_prob:\n                        new_matrix[src, dst] = 1 - new_matrix[src, dst]\n\n            op_mutation_prob = mutation_rate / OP_SPOTS\n            for ind in range(1, OP_SPOTS + 1):\n                if random.random() < op_mutation_prob:\n                    available = [o for o in OPS if o != new_ops[ind]]\n                    new_ops[ind] = random.choice(available)\n\n            new_spec = api.ModelSpec(new_matrix, new_ops)\n            if nasbench.is_valid(new_spec):\n                return {\n                    'matrix': new_matrix,\n                    'ops': new_ops\n                }\n        return self.mutate(nasbench, mutation_rate+1)\n\n    def encode_standard(self):\n        \"\"\" \n        compute the \"standard\" encoding,\n        i.e. adjacency matrix + op list encoding \n        \"\"\"\n        encoding_length = (NUM_VERTICES ** 2 - NUM_VERTICES) // 2 + OP_SPOTS\n        encoding = np.zeros((encoding_length))\n        dic = {CONV1X1: 0., CONV3X3: 0.5, MAXPOOL3X3: 1.0}\n        n = 0\n        for i in range(NUM_VERTICES - 1):\n            for j in range(i+1, NUM_VERTICES):\n                encoding[n] = self.matrix[i][j]\n                n += 1\n        for i in range(1, NUM_VERTICES - 1):\n            encoding[-i] = dic[self.ops[i]]\n        return tuple(encoding)\n\n    def get_paths(self):\n        \"\"\" \n        return all paths from input to output\n        \"\"\"\n        paths = []\n        for j in range(0, NUM_VERTICES):\n            paths.append([[]]) if self.matrix[0][j] else paths.append([])\n        \n        # create paths sequentially\n        for i in range(1, NUM_VERTICES - 1):\n            for j in range(1, NUM_VERTICES):\n                if self.matrix[i][j]:\n                    for path in paths[i]:\n                        paths[j].append([*path, self.ops[i]])\n        return paths[-1]\n\n    def get_path_indices(self):\n        \"\"\"\n        compute the index of each path\n        There are 3^0 + ... + 3^5 paths total.\n        (Paths can be length 0 to 5, and for each path, for each node, there\n        are three choices for the operation.)\n        \"\"\"\n        paths = self.get_paths()\n        mapping = {CONV3X3: 0, CONV1X1: 1, MAXPOOL3X3: 2}\n        path_indices = []\n\n        for path in paths:\n            index = 0\n            for i in range(NUM_VERTICES - 1):\n                if i == len(path):\n                    path_indices.append(index)\n                    break\n                else:\n                    index += len(OPS) ** i * (mapping[path[i]] + 1)\n\n        path_indices.sort()\n        return tuple(path_indices)\n\n    def encode_paths(self):\n        \"\"\" output one-hot encoding of paths \"\"\"\n        num_paths = sum([len(OPS) ** i for i in range(OP_SPOTS + 1)])\n        path_indices = self.get_path_indices()\n        encoding = np.zeros(num_paths)\n        for index in path_indices:\n            encoding[index] = 1\n        return encoding\n\n    def path_distance(self, other):\n        \"\"\" \n        compute the distance between two architectures\n        by comparing their path encodings\n        \"\"\"\n        return np.sum(np.array(self.encode_paths() != np.array(other.encode_paths())))\n\n    def trunc_path_distance(self, other, cutoff=40):\n        \"\"\" \n        compute the distance between two architectures\n        by comparing their path encodings\n        \"\"\"\n        encoding = self.encode_paths()[:cutoff]\n        other_encoding = other.encode_paths()[:cutoff]\n        return np.sum(np.array(encoding) != np.array(other_encoding))\n\n    def edit_distance(self, other):\n        \"\"\"\n        compute the distance between two architectures\n        by comparing their adjacency matrices and op lists\n        \"\"\"\n        graph_dist = np.sum(np.array(self.matrix) != np.array(other.matrix))\n        ops_dist = np.sum(np.array(self.ops) != np.array(other.ops))\n        return (graph_dist + ops_dist)\n\n    def nasbot_distance(self, other):\n        # distance based on optimal transport between row sums, column sums, and ops\n\n        row_sums = sorted(np.array(self.matrix).sum(axis=0))\n        col_sums = sorted(np.array(self.matrix).sum(axis=1))\n\n        other_row_sums = sorted(np.array(other.matrix).sum(axis=0))\n        other_col_sums = sorted(np.array(other.matrix).sum(axis=1))\n\n        row_dist = np.sum(np.abs(np.subtract(row_sums, other_row_sums)))\n        col_dist = np.sum(np.abs(np.subtract(col_sums, other_col_sums)))\n\n        counts = [self.ops.count(op) for op in OPS]\n        other_counts = [other.ops.count(op) for op in OPS]\n\n        ops_dist = np.sum(np.abs(np.subtract(counts, other_counts)))\n\n        return (row_dist + col_dist + ops_dist)\n\n"
  },
  {
    "path": "nas_bench_201/__init__.py",
    "content": "\n"
  },
  {
    "path": "nas_bench_201/cell.py",
    "content": "import numpy as np\nimport copy\nimport itertools\nimport random\nimport sys\nimport os\nimport pickle\n\n\nOPS = ['avg_pool_3x3', 'nor_conv_1x1', 'nor_conv_3x3', 'none', 'skip_connect']\nNUM_OPS = len(OPS)\nOP_SPOTS = 6\nLONGEST_PATH_LENGTH = 3\n\nclass Cell:\n\n    def __init__(self, string):\n        self.string = string\n\n    def get_string(self):\n        return self.string\n\n    def serialize(self):\n        return {\n            'string':self.string\n        }\n\n    @classmethod\n    def random_cell(cls, nasbench, max_nodes=4):\n        \"\"\"\n        From the AutoDL-Projects repository\n        \"\"\"\n        ops = []\n        for i in range(OP_SPOTS):\n            op = random.choice(OPS)\n            ops.append(op)\n        return {'string':cls.get_string_from_ops(ops)}\n\n\n    def get_runtime(self, nasbench, dataset='cifar100'):\n        return nasbench.query_by_index(index, dataset).get_eval('x-valid')['time']\n\n    def get_val_loss(self, nasbench, deterministic=1, dataset='cifar100'):\n        index = nasbench.query_index_by_arch(self.string)\n        if dataset == 'cifar10':\n            results = nasbench.query_by_index(index, 'cifar10-valid')\n        else:\n            results = nasbench.query_by_index(index, dataset)\n\n        accs = []\n        for key in results.keys():\n            accs.append(results[key].get_eval('x-valid')['accuracy'])\n\n        if deterministic:\n            return round(100-np.mean(accs), 10)   \n        else:\n            return round(100-np.random.choice(accs), 10)\n\n    def get_test_loss(self, nasbench, dataset='cifar100', deterministic=1):\n        index = nasbench.query_index_by_arch(self.string)\n        results = nasbench.query_by_index(index, dataset)\n\n        accs = []\n        for key in results.keys():\n            accs.append(results[key].get_eval('ori-test')['accuracy'])\n\n        if deterministic:\n            return round(100-np.mean(accs), 4)   \n        else:\n            return round(100-np.random.choice(accs), 4)\n\n    def get_op_list(self):\n        # given a string, get the list of operations\n        tokens = self.string.split('|')\n        ops = [t.split('~')[0] for i,t in enumerate(tokens) if i not in [0,2,5,9]]\n        return ops\n\n    def get_num(self):\n        # compute the unique number of the architecture, in [0, 15624]\n        ops = self.get_op_list()\n        index = 0\n        for i, op in enumerate(ops):\n            index += OPS.index(op) * NUM_OPS ** i\n        return index\n\n    @classmethod\n    def get_string_from_ops(cls, ops):\n        # given a list of operations, get the string\n        strings = ['|']\n        nodes = [0, 0, 1, 0, 1, 2]\n        for i, op in enumerate(ops):\n            strings.append(op+'~{}|'.format(nodes[i]))\n            if i < len(nodes) - 1 and nodes[i+1] == 0:\n                strings.append('+|')\n        return ''.join(strings)\n\n    def perturb(self, nasbench,\n                mutation_rate=1):\n        # more deterministic version of mutate\n        ops = self.get_op_list()\n        new_ops = []\n        num = np.random.choice(len(ops))\n        for i, op in enumerate(ops):\n            if i == num:\n                available = [o for o in OPS if o != op]\n                new_ops.append(np.random.choice(available))\n            else:\n                new_ops.append(op)\n        return {'string':self.get_string_from_ops(new_ops)}\n\n    def mutate(self, \n               nasbench, \n               mutation_rate=1.0, \n               patience=5000):\n\n        p = 0\n        ops = self.get_op_list()\n        new_ops = []\n        # keeping mutation_prob consistent with nasbench_101\n        mutation_prob = mutation_rate / (OP_SPOTS - 2)\n\n        for i, op in enumerate(ops):\n            if random.random() < mutation_prob:\n                available = [o for o in OPS if o != op]\n                new_ops.append(random.choice(available))\n            else:\n                new_ops.append(op)\n\n        return {'string':self.get_string_from_ops(new_ops)}\n\n    def encode_standard(self):\n        \"\"\" \n        compute the standard encoding\n        \"\"\"\n        ops = self.get_op_list()\n        encoding = []\n        for op in ops:\n            encoding.append(OPS.index(op))\n\n        return encoding\n\n    def get_num_params(self, nasbench):\n        # todo update to the newer nasbench-201 dataset\n        return 100\n\n    def get_paths(self):\n        \"\"\" \n        return all paths from input to output\n        \"\"\"\n        path_blueprints = [[3], [0,4], [1,5], [0,2,5]]\n        ops = self.get_op_list()\n        paths = []\n        for blueprint in path_blueprints:\n            paths.append([ops[node] for node in blueprint])\n\n        return paths\n\n    def get_path_indices(self):\n        \"\"\"\n        compute the index of each path\n        \"\"\"\n        paths = self.get_paths()\n        path_indices = []\n\n        for i, path in enumerate(paths):\n            if i == 0:\n                index = 0\n            elif i in [1, 2]:\n                index = NUM_OPS\n            else:\n                index = NUM_OPS + NUM_OPS ** 2\n            for j, op in enumerate(path):\n                index += OPS.index(op) * NUM_OPS ** j\n            path_indices.append(index)\n\n        return tuple(path_indices)\n\n    def encode_paths(self):\n        \"\"\" output one-hot encoding of paths \"\"\"\n        num_paths = sum([NUM_OPS ** i for i in range(1, LONGEST_PATH_LENGTH + 1)])\n        path_indices = self.get_path_indices()\n        encoding = np.zeros(num_paths)\n        for index in path_indices:\n            encoding[index] = 1\n        return encoding\n\n    def path_distance(self, other):\n        \"\"\" \n        compute the distance between two architectures\n        by comparing their path encodings\n        \"\"\"\n        return np.sum(np.array(self.encode_paths() != np.array(other.encode_paths())))\n\n    def trunc_path_distance(self, other, cutoff=30):\n        \"\"\" \n        compute the distance between two architectures\n        by comparing their truncated path encodings\n        \"\"\"\n        paths = np.array(self.encode_paths()[cutoff])\n        other_paths = np.array(other.encode_paths()[cutoff])\n        return np.sum(paths != other_paths)\n\n    def edit_distance(self, other):\n\n        ops = self.get_op_list()\n        other_ops = other.get_op_list()\n        return np.sum([1 for i in range(len(ops)) if ops[i] != other_ops[i]])\n\n    def nasbot_distance(self, other):\n        # distance based on optimal transport between row sums, column sums, and ops\n\n        ops = self.get_op_list()\n        other_ops = other.get_op_list()\n\n        counts = [ops.count(op) for op in OPS]\n        other_counts = [other_ops.count(op) for op in OPS]\n        ops_dist = np.sum(np.abs(np.subtract(counts, other_counts)))\n\n        return ops_dist + self.edit_distance(other)\n"
  },
  {
    "path": "params.py",
    "content": "import sys\n\n\ndef algo_params(param_str):\n    \"\"\"\n      Return params list based on param_str.\n      These are the parameters used to produce the figures in the paper\n      For AlphaX and Reinforcement Learning, we used the corresponding github repos:\n      https://github.com/linnanwang/AlphaX-NASBench101\n      https://github.com/automl/nas_benchmarks\n    \"\"\"\n    params = []\n\n    if param_str == 'test':\n        params.append({'algo_name':'random', 'total_queries':30})\n        params.append({'algo_name':'evolution', 'total_queries':30})\n        params.append({'algo_name':'bananas', 'total_queries':30})   \n        params.append({'algo_name':'gp_bayesopt', 'total_queries':30})\n        params.append({'algo_name':'dngo', 'total_queries':30})\n\n    elif param_str == 'test_simple': \n        params.append({'algo_name':'random', 'total_queries':30})\n        params.append({'algo_name':'evolution', 'total_queries':30})\n\n    elif param_str == 'random': \n        params.append({'algo_name':'random', 'total_queries':10})\n\n    elif param_str == 'bananas':\n        params.append({'algo_name':'bananas', 'total_queries':150, 'verbose':0})\n\n    elif param_str == 'main_experiments':\n        params.append({'algo_name':'random', 'total_queries':150})\n        params.append({'algo_name':'evolution', 'total_queries':150})\n        params.append({'algo_name':'bananas', 'total_queries':150})  \n        params.append({'algo_name':'gp_bayesopt', 'total_queries':150})        \n        params.append({'algo_name':'dngo', 'total_queries':150})\n\n    elif param_str == 'ablation':\n        params.append({'algo_name':'bananas', 'total_queries':150})   \n        params.append({'algo_name':'bananas', 'total_queries':150, 'encoding_type':'adjacency'})\n        params.append({'algo_name':'gp_bayesopt', 'total_queries':150, 'distance':'path_distance'})\n        params.append({'algo_name':'gp_bayesopt', 'total_queries':150, 'distance':'edit_distance'})\n        params.append({'algo_name':'bananas', 'total_queries':150, 'acq_opt_type':'random'})\n\n    else:\n        print('invalid algorithm params: {}'.format(param_str))\n        sys.exit()\n\n    print('\\n* Running experiment: ' + param_str)\n    return params\n\n\ndef meta_neuralnet_params(param_str):\n\n    if param_str == 'nasbench':\n        params = {'search_space':'nasbench', 'dataset':'cifar10', 'loss':'mae', 'num_layers':10, 'layer_width':20, \\\n            'epochs':150, 'batch_size':32, 'lr':.01, 'regularization':0, 'verbose':0}\n\n    elif param_str == 'darts':\n        params = {'search_space':'darts', 'dataset':'cifar10', 'loss':'mape', 'num_layers':10, 'layer_width':20, \\\n            'epochs':10000, 'batch_size':32, 'lr':.00001, 'regularization':0, 'verbose':0}\n\n    elif param_str == 'nasbench_201_cifar10':\n        params = {'search_space':'nasbench_201', 'dataset':'cifar10', 'loss':'mae', 'num_layers':10, 'layer_width':20, \\\n            'epochs':150, 'batch_size':32, 'lr':.01, 'regularization':0, 'verbose':0}\n\n    elif param_str == 'nasbench_201_cifar100':\n        params = {'search_space':'nasbench_201', 'dataset':'cifar100', 'loss':'mae', 'num_layers':10, 'layer_width':20, \\\n            'epochs':150, 'batch_size':32, 'lr':.01, 'regularization':0, 'verbose':0}\n\n    elif param_str == 'nasbench_201_imagenet':\n        params = {'search_space':'nasbench_201', 'dataset':'ImageNet16-120', 'loss':'mae', 'num_layers':10, 'layer_width':20, \\\n            'epochs':150, 'batch_size':32, 'lr':.01, 'regularization':0, 'verbose':0}\n\n    else:\n        print('invalid meta neural net params: {}'.format(param_str))\n        sys.exit()\n\n    return params\n"
  },
  {
    "path": "run_experiments_parallel.sh",
    "content": "\nparam_str=fifty_epochs\nexperiment_name=bananas\n\n# set all instance names and zones\ninstances=(bananas-t4-1-vm bananas-t4-2-vm bananas-t4-3-vm bananas-t4-4-vm \\\n\tbananas-t4-5-vm bananas-t4-6-vm bananas-t4-7-vm bananas-t4-8-vm \\\n\tbananas-t4-9-vm bananas-t4-10-vm)\n\nzones=(us-west1-b us-west1-b us-west1-b us-west1-b us-west1-b us-west1-b \\\n\tus-west1-b us-west1-b us-west1-b us-west1-b)\n\n# set parameters based on the param string\nif [ $param_str = test ]; then\n\tstart_iteration=0\n\tend_iteration=1\n\tk=10\n\tuntrained_filename=untrained_spec\n\ttrained_filename=trained_spec\n\tepochs=1\nfi\nif [ $param_str = fifty_epochs ]; then\n\tstart_iteration=0\n\tend_iteration=9\n\tk=10\n\tuntrained_filename=untrained_spec\n\ttrained_filename=trained_spec\n\tepochs=50\nfi\n\n# start bananas\nfor i in $(seq $start_iteration $end_iteration)\ndo \n\tlet start=$i*$k\n\tlet end=($i+1)*$k-1\n\n\t# train the neural net\n\t# input: all pickle files with index from 0 to i*k-1\n\t# output: k pickle files for the architectures to train next (indices i*k to (i+1)*k-1)\n\techo about to run meta neural network in iteration $i\n\tpython3 metann_runner.py --experiment_name $experiment_name --params $nas_params --k $k \\\n\t\t--untrained_filename $untrained_filename --trained_filename $trained_filename --query $start\n\techo outputted architectures to train in iteration $i\n\n\t# train the k architectures\n\tlet max_j=$k-1\n\tfor j in $(seq 0 $max_j )\n\tdo\n\t\tlet query=$i*$k+$j\n\t\tinstance=${instances[$j]}\n\t\tzone=${zones[$j]}\n\t\tuntrained_filepath=$experiment_name/$untrained_filename\\_$query.pkl\n\t\ttrained_filepath=$experiment_name/$trained_filename\\_$query.pkl\n\n\t\techo about to copy file $untrained_filepath to instance $instance\n\t\tgcloud compute scp $untrained_filepath $instance:~/naszilla/$experiment_name/ --zone $zone\n\n\t\techo about to ssh into instance $instance\n\t\tgcloud compute ssh $instance --zone $zone --command=\"cd naszilla; \\ \n\t\tpython3 train_arch_runner.py --untrained_filepath $untrained_filepath \\\n\t\t--trained_filepath $trained_filepath --epochs $epochs\" &\n\tdone\n\twait\n\techo all architectures trained in iteration $i\n\n\t# copy results of trained architectures to the master CPU\n\tlet max_j=$k-1\n\tfor j in $(seq 0 $max_j )\n\tdo\n\t\tlet query=$i*$k+$j\n\t\tinstance=${instances[$j]}\n\t\tzone=${zones[$j]}\n\t\ttrained_filepath=$experiment_name/$trained_filename\\_$query.pkl\n\t\tgcloud compute scp $instance:~/naszilla/$trained_filepath $experiment_name --zone $zone\n\tdone\n\techo finished iteration $i\ndone\n\n"
  },
  {
    "path": "run_experiments_sequential.py",
    "content": "import argparse\nimport time\nimport logging\nimport sys\nimport os\nimport pickle\nimport numpy as np\nimport copy\n\nfrom params import *\n\n\ndef run_experiments(args, save_dir):\n\n    os.environ['search_space'] = args.search_space\n\n    from nas_algorithms import run_nas_algorithm\n    from data import Data\n\n    trials = args.trials\n    out_file = args.output_filename\n    save_specs = args.save_specs\n    metann_params = meta_neuralnet_params(args.search_space)\n    algorithm_params = algo_params(args.algo_params)\n    num_algos = len(algorithm_params)\n    logging.info(algorithm_params)\n\n    # set up search space\n    mp = copy.deepcopy(metann_params)\n    ss = mp.pop('search_space')\n    dataset = mp.pop('dataset')\n    search_space = Data(ss, dataset=dataset)\n\n    for i in range(trials):\n        results = []\n        walltimes = []\n        run_data = []\n\n        for j in range(num_algos):\n            # run NAS algorithm\n            print('\\n* Running algorithm: {}'.format(algorithm_params[j]))\n            starttime = time.time()\n            algo_result, run_datum = run_nas_algorithm(algorithm_params[j], search_space, mp)\n            algo_result = np.round(algo_result, 5)\n\n            # remove unnecessary dict entries that take up space\n            for d in run_datum:\n                if not save_specs:\n                    d.pop('spec')\n                for key in ['encoding', 'adjacency', 'path', 'dist_to_min']:\n                    if key in d:\n                        d.pop(key)\n\n            # add walltime, results, run_data\n            walltimes.append(time.time()-starttime)\n            results.append(algo_result)\n            run_data.append(run_datum)\n\n        # print and pickle results\n        filename = os.path.join(save_dir, '{}_{}.pkl'.format(out_file, i))\n        print('\\n* Trial summary: (params, results, walltimes)')\n        print(algorithm_params)\n        print(metann_params)\n        print(results)\n        print(walltimes)\n        print('\\n* Saving to file {}'.format(filename))\n        with open(filename, 'wb') as f:\n            pickle.dump([algorithm_params, metann_params, results, walltimes, run_data], f)\n            f.close()\n\ndef main(args):\n\n    # make save directory\n    save_dir = args.save_dir\n    if not os.path.exists(save_dir):\n        os.mkdir(save_dir)\n\n    algo_params = args.algo_params\n    save_path = save_dir + '/' + algo_params + '/'\n    if not os.path.exists(save_path):\n        os.mkdir(save_path)\n\n    # set up logging\n    log_format = '%(asctime)s %(message)s'\n    logging.basicConfig(stream=sys.stdout, level=logging.INFO,\n        format=log_format, datefmt='%m/%d %I:%M:%S %p')\n    fh = logging.FileHandler(os.path.join(save_dir, 'log.txt'))\n    fh.setFormatter(logging.Formatter(log_format))\n    logging.getLogger().addHandler(fh)\n    logging.info(args)\n\n    run_experiments(args, save_path)\n    \n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(description='Args for BANANAS experiments')\n    parser.add_argument('--trials', type=int, default=500, help='Number of trials')\n    parser.add_argument('--search_space', type=str, default='nasbench', \\\n        help='nasbench or darts')\n    parser.add_argument('--algo_params', type=str, default='main_experiments', help='which parameters to use')\n    parser.add_argument('--output_filename', type=str, default='round', help='name of output files')\n    parser.add_argument('--save_dir', type=str, default='results_output', help='name of save directory')\n    parser.add_argument('--save_specs', type=bool, default=False, help='save the architecture specs')    \n\n    args = parser.parse_args()\n    main(args)\n"
  },
  {
    "path": "train_arch_runner.py",
    "content": "import argparse\nimport time\nimport logging\nimport sys\nimport os\nimport pickle\n\nsys.path.append(os.path.expanduser('~/darts/cnn'))\nfrom train_class import Train\n\n\"\"\"\ntrain arch runner is used in run_experiments_parallel\n\n - loads data by opening a pickle file containing an architecture spec\n - trains that architecture for e epochs\n - outputs a new pickle file with the architecture spec and its validation loss\n\"\"\"\n\ndef run(args):\n\n    untrained_filepath = os.path.expanduser(args.untrained_filepath)\n    trained_filepath = os.path.expanduser(args.trained_filepath)\n    epochs = args.epochs\n    gpu = args.gpu\n    train_portion = args.train_portion\n    seed = args.seed\n    save = args.save\n\n    # load the arch spec that will be trained\n    dic = pickle.load(open(untrained_filepath, 'rb'))\n    arch = dic['spec']\n    print('loaded arch', arch)\n\n    # train the arch\n    trainer = Train()\n    val_accs, test_accs = trainer.main(arch, \n                                        epochs=epochs, \n                                        gpu=gpu, \n                                        train_portion=train_portion, \n                                        seed=seed, \n                                        save=save)\n\n    val_sum = 0\n    for epoch, val_acc in val_accs:\n        key = 'val_loss_' + str(epoch)\n        dic[key] = 100 - val_acc\n        val_sum += dic[key]\n    for epoch, test_acc in test_accs:\n        key = 'test_loss_' + str(epoch)\n        dic[key] = 100 - test_acc\n\n    val_loss_avg = val_sum / len(val_accs)\n\n    dic['val_loss_avg'] = val_loss_avg\n    dic['val_loss'] = 100 - val_accs[-1][-1]\n    dic['test_loss'] = 100 - test_accs[-1][-1]\n    dic['filepath'] = args.trained_filepath\n\n    print('arch {}'.format(arch))\n    print('val loss: {}'.format(dic['val_loss']))\n    print('test loss: {}'.format(dic['test_loss']))\n    print('val loss avg: {}'.format(dic['val_loss_avg']))\n\n    with open(trained_filepath, 'wb') as f:\n        pickle.dump(dic, f)\n\ndef main(args):\n\n    #set up save dir\n    save_dir = './'\n\n    #set up logging\n    log_format = '%(asctime)s %(message)s'\n    logging.basicConfig(stream=sys.stdout, level=logging.INFO,\n        format=log_format, datefmt='%m/%d %I:%M:%S %p')\n    fh = logging.FileHandler(os.path.join(save_dir, 'log.txt'))\n    fh.setFormatter(logging.Formatter(log_format))\n    logging.getLogger().addHandler(fh)\n    logging.info(args)\n\n    run(args)\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(description='Args for training a darts arch')\n    parser.add_argument('--untrained_filepath', type=str, default='darts_test/untrained_spec_0.pkl', help='name of input files')\n    parser.add_argument('--trained_filepath', type=str, default='darts_test/trained_spec_0.pkl', help='name of output files')\n    parser.add_argument('--epochs', type=int, default=50, help='number of training epochs')\n    parser.add_argument('--gpu', type=int, default=0, help='which gpu to use')\n    parser.add_argument('--train_portion', type=float, default=0.7, help='portion of training data used for training')\n    parser.add_argument('--seed', type=float, default=0, help='random seed to use')\n    parser.add_argument('--save', type=str, default='EXP', help='directory to save to')\n\n    args = parser.parse_args()\n    main(args)\n"
  }
]